# Explicit Indexes

Indexes are a fundamental concept used to uniquely identify rows in a data structure, such as a Series or DataFrame. They act as labels for the rows, enabling efficient data access, alignment, and merging. The pandas library is built on top of NumPy and provides powerful indexing capabilities, making it an essential tool for data analysis and manipulation.

In [31]:
import pandas as pd

In [32]:
path = r"C:\Users\Alysson\Documents\GitHub\Pandas-in-Python\database.xlsx"
data = pd.read_excel(path)

In [33]:
data.columns

Index(['name', 'breed', 'color', 'height_cm', 'weight_kg', 'age'], dtype='object')

In [34]:
data.index

RangeIndex(start=0, stop=5, step=1)

### Setting a column as the index

In [35]:
data_ind = data.set_index("name")
data_ind

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,age
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Paçoca,Labrador,Brown,56,25,5
Ivo,Poodle,Black,43,22,4
Lola,Schnauzer,Gray,49,23,4
Maracatu,King Cavalier,Brown,43,21,3
Chantal,Labrador,Black,59,29,6


In [36]:
data_ind.reset_index(drop=True)

Unnamed: 0,breed,color,height_cm,weight_kg,age
0,Labrador,Brown,56,25,5
1,Poodle,Black,43,22,4
2,Schnauzer,Gray,49,23,4
3,King Cavalier,Brown,43,21,3
4,Labrador,Black,59,29,6


### Subsetting

In [37]:
data[data["name"].isin(["Ivo","Maracatu"])]

Unnamed: 0,name,breed,color,height_cm,weight_kg,age
1,Ivo,Poodle,Black,43,22,4
3,Maracatu,King Cavalier,Brown,43,21,3


In [38]:
## It is easier to do this using the "loc" method

data_ind.loc[["Ivo","Maracatu"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,age
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ivo,Poodle,Black,43,22,4
Maracatu,King Cavalier,Brown,43,21,3


### Multi-level indexes

In [39]:
data_ind2=data.set_index(["breed","color"])
data_ind2

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4
Schnauzer,Gray,Lola,49,23,4
King Cavalier,Brown,Maracatu,43,21,3
Labrador,Black,Chantal,59,29,6


In [40]:
# Subset the outer level with a list

data_ind2.loc[["Labrador","Poodle"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Labrador,Black,Chantal,59,29,6
Poodle,Black,Ivo,43,22,4


In [41]:
# Subset inner levels with a list of tuples

data_ind2.loc[[("Labrador","Brown"),("Poodle","Black")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4


### Sorting by index values

In [42]:
data_ind2.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
King Cavalier,Brown,Maracatu,43,21,3
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4
Schnauzer,Gray,Lola,49,23,4


In [43]:
data_ind2.sort_index(level=["color","breed"],ascending=[True,False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Poodle,Black,Ivo,43,22,4
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
King Cavalier,Brown,Maracatu,43,21,3
Schnauzer,Gray,Lola,49,23,4


### Slicing and Subsetting with .loc and .iloc

In pandas, you can use .loc and .iloc to slice and subset data from a DataFrame or Series. Both methods allow you to select rows and columns based on labels (.loc) or integer positions (.iloc). 

In [45]:
data_srt=data.set_index(["breed","color"]).sort_index()
data_srt

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
King Cavalier,Brown,Maracatu,43,21,3
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4
Schnauzer,Gray,Lola,49,23,4


In [46]:
## Slicing the outer index level

data_srt.loc["Labrador":"Poodle"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Black,Chantal,59,29,6
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4


Note: Slicing with loc includes the first and last values, which is different from lists.

In [47]:
## Slicing the inner index levels

data_srt.loc[("Labrador","Brown"):("Poodle","Black")]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,age
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Paçoca,56,25,5
Poodle,Black,Ivo,43,22,4


In [50]:
## Slicing columns

data_srt.loc[:,"name":"weight_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
King Cavalier,Brown,Maracatu,43,21
Labrador,Black,Chantal,59,29
Labrador,Brown,Paçoca,56,25
Poodle,Black,Ivo,43,22
Schnauzer,Gray,Lola,49,23


In [52]:
## Slice twice

data_srt.loc[("Labrador","Brown"):("Poodle","Black"),"name":"weight_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Labrador,Brown,Paçoca,56,25
Poodle,Black,Ivo,43,22


## Working with pivot tables

In [56]:
data_pivot = data.pivot_table("weight_kg", index = "breed",columns="color",fill_value=0)
data_pivot

color,Black,Brown,Gray
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
King Cavalier,0,21,0
Labrador,29,25,0
Poodle,22,0,0
Schnauzer,0,0,23


In [57]:
data_pivot.loc["Labrador":"Poodle"]

color,Black,Brown,Gray
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Labrador,29,25,0
Poodle,22,0,0


In [58]:
data_pivot.mean(axis="index")

color
Black    12.75
Brown    11.50
Gray      5.75
dtype: float64

In [59]:
data_pivot.mean(axis="columns")

breed
King Cavalier     7.000000
Labrador         18.000000
Poodle            7.333333
Schnauzer         7.666667
dtype: float64