In [1]:
import numpy as np
import pandas as pd

# Hierarchical indexing (`MultiIndex`)

## Creating a `MultiIndex` (hierarchical index) object

In [2]:
# Create tuples to use for MultiIndex labels
arrays = [
    ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
    ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']
]

tuples = list(zip(*arrays))
tuples

[('bar', 'one'),
 ('bar', 'two'),
 ('baz', 'one'),
 ('baz', 'two'),
 ('foo', 'one'),
 ('foo', 'two'),
 ('qux', 'one'),
 ('qux', 'two')]

In [3]:
# Create MultiIndex Series from tuples
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
s = pd.Series(np.random.randn(8), index=index)
s

first  second
bar    one       0.906944
       two      -0.499347
baz    one      -0.774320
       two       1.174559
foo    one       2.001864
       two       1.594525
qux    one      -0.198297
       two      -1.257977
dtype: float64

In [4]:
# Create MultiIndex Series from arrays
iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
s = pd.Series(np.random.randn(8), 
              index=pd.MultiIndex.from_product(iterables, names=['first', 'second']))
s

first  second
bar    one       0.576619
       two       0.317024
baz    one       0.572301
       two      -1.263939
foo    one      -0.366578
       two      -0.564358
qux    one       0.930409
       two       1.142612
dtype: float64

In [5]:
# Create DataFrame
df = pd.DataFrame(
    [
        ['bar', 'one'],
        ['bar', 'two'],
        ['foo', 'one'],
        ['foo', 'two']
    ],
    columns=['first', 'second']
)

df

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


In [6]:
# Create MultiIndex from DataFrame
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [7]:
# Create MultiIndex Series from arrays
s = pd.Series(np.random.randn(8), index=arrays)
s

bar  one    1.503375
     two   -0.306874
baz  one   -1.963563
     two   -0.607539
foo  one   -0.742961
     two    0.251203
qux  one   -0.559336
     two   -0.254505
dtype: float64

In [8]:
# Create MultiIndex DataFrame from arrays
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)
df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,0.859481,0.036781,-0.304908,-1.65046
bar,two,-1.751111,-0.059967,0.575405,-1.488022
baz,one,0.964563,-0.079773,-0.059842,0.137496
baz,two,-0.811768,0.109502,1.317267,1.580568
foo,one,0.260526,-1.143358,0.185806,-2.334131
foo,two,-0.0452,0.570291,-0.514724,0.719557
qux,one,-0.060246,1.374907,1.256563,-1.436751
qux,two,0.513667,1.479812,0.340074,0.146956


In [9]:
# Get names of DataFrame MultiIndex levels
df.index.names

FrozenList([None, None])

In [10]:
# Create DataFrame with MultiIndex columns
df = pd.DataFrame(
    np.random.randn(3, 8),
    index=['A', 'B', 'C'],
    columns=index
)

df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-1.115344,0.33626,0.241983,1.302016,1.551178,0.651496,-0.45674,-0.287175
B,-1.972294,-1.262359,0.446274,0.075196,0.371826,-0.69923,-0.199597,-0.335364
C,0.65868,1.955271,1.278968,-0.049237,-0.433633,-1.82377,-0.116647,0.170029


In [11]:
# Create DataFrame with MultiIndex rows and columns
pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.715445,0.474128,1.235782,-0.691349,0.753783,-0.340002
bar,two,0.993901,-0.231742,2.163862,0.352627,-1.351652,-0.480163
baz,one,0.66889,-0.991005,-0.949487,0.40649,-2.97841,-0.585242
baz,two,0.204696,2.361008,-1.316903,1.26705,-0.610187,-0.009712
foo,one,1.800261,0.25858,0.086617,-1.007929,0.956717,0.145956
foo,two,-0.798976,-0.37383,-1.234155,0.350216,0.46754,1.926585


In [12]:
# Create Series with a single Index of tuples
pd.Series(np.random.randn(8), index=tuples)

(bar, one)    0.418470
(bar, two)   -0.216292
(baz, one)   -0.368555
(baz, two)   -0.082154
(foo, one)   -2.265746
(foo, two)   -0.658835
(qux, one)    0.522134
(qux, two)    0.179716
dtype: float64

## Reconstructing the level labels

In [13]:
# Get level labels from highest level index
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [14]:
# Get level labels from highest level index
index.get_level_values('first')

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [15]:
# Get level values from lowest level index
index.get_level_values(1)

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [16]:
# Get level values from lowest level index
index.get_level_values('second')

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

## Basic indexing on axis with `MultiIndex`

In [17]:
# Select DataFrame columns where first level label is 'bar'
df['bar']

second,one,two
A,-1.115344,0.33626
B,-1.972294,-1.262359
C,0.65868,1.955271


In [18]:
# Select DataFrame column where first level label is 'bar' and second level label is 'one'
df['bar', 'one']

A   -1.115344
B   -1.972294
C    0.658680
Name: (bar, one), dtype: float64

In [19]:
# Select DataFrame column where first level label is 'bar' and second level label is 'one'
df['bar']['one']

A   -1.115344
B   -1.972294
C    0.658680
Name: one, dtype: float64

In [20]:
# Select Series elements where first level label is 'qux'
s['qux']

one   -0.559336
two   -0.254505
dtype: float64

## Defined levels

In [21]:
# Get levels of DataFrame column MultiIndex
df.columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [22]:
# Get levels of DataFrame column MultiIndex from subset of columns
# Note that all labels of all levels are returned, even if they are not present in the slice
# This avoids recomputing the levels to improve slicing performance
df[['foo', 'qux']].columns.levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [23]:
# Get labels of columns in DataFrame slice
df[['foo', 'qux']].columns.to_numpy()

array([('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')],
      dtype=object)

In [24]:
# Get first-level values of columns in DataFrame slice
df[['foo', 'qux']].columns.get_level_values(0)

Index(['foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [25]:
# Reconstruct MultiIndex from labels present in slice
new_mi = df[['foo', 'qux']].columns.remove_unused_levels()
new_mi.levels

FrozenList([['foo', 'qux'], ['one', 'two']])

## Data alignment and using `reindex`

In [26]:
# Display original Series
s

bar  one    1.503375
     two   -0.306874
baz  one   -1.963563
     two   -0.607539
foo  one   -0.742961
     two    0.251203
qux  one   -0.559336
     two   -0.254505
dtype: float64

In [27]:
# Add subset of MultiIndex Series to MultiIndex Series
s + s[:-2]

bar  one    3.006750
     two   -0.613748
baz  one   -3.927125
     two   -1.215077
foo  one   -1.485923
     two    0.502406
qux  one         NaN
     two         NaN
dtype: float64

In [28]:
# Add every second element of MultiIndex Series to MultiIndex Series
s + s[::2]

bar  one    3.006750
     two         NaN
baz  one   -3.927125
     two         NaN
foo  one   -1.485923
     two         NaN
qux  one   -1.118671
     two         NaN
dtype: float64

In [29]:
# Reindex MultiIndex Series using another MultiIndex
s.reindex(index[:3])

first  second
bar    one       1.503375
       two      -0.306874
baz    one      -1.963563
dtype: float64

In [30]:
# Reindex MultiIndex Series using array of tuples
s.reindex([('foo', 'two'), ('bar', 'one'), ('qux', 'one'), ('baz', 'one')])
s

bar  one    1.503375
     two   -0.306874
baz  one   -1.963563
     two   -0.607539
foo  one   -0.742961
     two    0.251203
qux  one   -0.559336
     two   -0.254505
dtype: float64

# Advanced indexing with hierarchical index

In [31]:
# Transpose DataFrame
df = df.T
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-1.115344,-1.972294,0.65868
bar,two,0.33626,-1.262359,1.955271
baz,one,0.241983,0.446274,1.278968
baz,two,1.302016,0.075196,-0.049237
foo,one,1.551178,0.371826,-0.433633
foo,two,0.651496,-0.69923,-1.82377
qux,one,-0.45674,-0.199597,-0.116647
qux,two,-0.287175,-0.335364,0.170029


In [32]:
# Select row of DataFrame with label ('bar', 'two')
df.loc[('bar', 'two')]

A    0.336260
B   -1.262359
C    1.955271
Name: (bar, two), dtype: float64

In [33]:
# Select element of DataFrame in row ('bar', 'two') and column 'A'
df.loc[('bar', 'two'), 'A']

0.33625974497978733

In [34]:
# Select all rows of DataFrame with label 'bar'
df.loc['bar'] 

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,-1.115344,-1.972294,0.65868
two,0.33626,-1.262359,1.955271


In [35]:
# Select all rows of DataFrame with labels from 'baz' to 'foo'
df.loc['baz':'foo']

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,one,0.241983,0.446274,1.278968
baz,two,1.302016,0.075196,-0.049237
foo,one,1.551178,0.371826,-0.433633
foo,two,0.651496,-0.69923,-1.82377


In [36]:
# Select all rows of DataFrame with labels from ('baz', 'two') to ('qux', 'one')
df.loc[('baz', 'two'):('qux', 'one')]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,1.302016,0.075196,-0.049237
foo,one,1.551178,0.371826,-0.433633
foo,two,0.651496,-0.69923,-1.82377
qux,one,-0.45674,-0.199597,-0.116647


In [37]:
# Select all rows of DataFrame with labels from ('baz', 'two') to 'foo'b
df.loc[('baz', 'two'):'foo']

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,1.302016,0.075196,-0.049237
foo,one,1.551178,0.371826,-0.433633
foo,two,0.651496,-0.69923,-1.82377


In [38]:
# Select rows of DataFrame with labels ('bar', 'two'), ('qux', 'one')
df.loc[[('bar', 'two'), ('qux', 'one')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,0.33626,-1.262359,1.955271
qux,one,-0.45674,-0.199597,-0.116647


In [39]:
# Create MultiIndex Series
s = pd.Series(
    [1, 2, 3, 4, 5, 6],
    index=pd.MultiIndex.from_product(
        [
            ['A', 'B'],
            ['c', 'd', 'e']
        ]
    )
)

s

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int64

In [40]:
# Select elements of Series with labels ('A', 'c'), ('B', 'd')
s.loc[[('A', 'c'), ('B', 'd')]]

A  c    1
B  d    5
dtype: int64

In [41]:
# Select elements of Series where:
#  First level label is 'A' or 'B'
#  Second level label is 'c' or 'd'
s.loc[(['A', 'B'], ['c', 'd'])]

A  c    1
   d    2
B  c    4
   d    5
dtype: int64

## Using slicers

In [42]:
# Method for making labels
def make_label(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

make_label("A", 4)

['A0', 'A1', 'A2', 'A3']

In [43]:
# Create DataFrame with MultiIndex index and MultiIndex columns
mi_index = pd.MultiIndex.from_product(
    [make_label('A', 4), make_label('B', 2), make_label('C', 4), make_label('D', 2)]
)

mi_columns = pd.MultiIndex.from_tuples(
    [('a', 'foo'), ('a', 'bar'), ('b', 'foo'), ('b', 'bah')],
    names=['lvl0', 'lvl1']
)

dfmi = pd.DataFrame(
    np.arange(len(mi_index) * len(mi_columns)).reshape((len(mi_index), len(mi_columns))),
    index=mi_index,
    columns=mi_columns
).sort_index().sort_index(axis=1)

dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [44]:
# Select slice of DataFrame using slices, lists, and labels
# Select rows:
#  From 'A1' to 'A3'
#  All rows
#  Rows 'C1' and 'C3'
# Select columns:
#  All columns
dfmi.loc[(slice('A1', 'A3'), slice(None), ['C1', 'C3']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [45]:
# Select slice of DataFrame using IndexSlice
# Select rows:
#  From 'A1' to 'A3'
#  All rows
#  Rows 'C1' and 'C3'
# Select columns:
#  All columns
idx = pd.IndexSlice
dfmi.loc[idx['A1':'A3', :, ['C1', 'C3']], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [46]:
# Select rows:
#   First level: 'A1'
#   Second level: All
#   Third level: All
# Select columns:
#   First level: All
#   Second level: 'foo'
dfmi.loc['A1', (slice(None), 'foo')]

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,foo
B0,C0,D0,64,66
B0,C0,D1,68,70
B0,C1,D0,72,74
B0,C1,D1,76,78
B0,C2,D0,80,82
B0,C2,D1,84,86
B0,C3,D0,88,90
B0,C3,D1,92,94
B1,C0,D0,96,98
B1,C0,D1,100,102


In [47]:
# Select rows:
#   First level: All
#   Second level: All
#   Third level: 'C1' and 'C3'
# Select columns:
#   First level: All
#   Second level: 'foo'
dfmi.loc[idx[:, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [48]:
# Find where elements of DataFrame in column ('a', 'foo') are greater than 200
mask = dfmi[('a', 'foo')] > 200
mask

A0  B0  C0  D0    False
            D1    False
        C1  D0    False
            D1    False
        C2  D0    False
                  ...  
A3  B1  C1  D1     True
        C2  D0     True
            D1     True
        C3  D0     True
            D1     True
Name: (a, foo), Length: 64, dtype: bool

In [49]:
# Select rows:
#   First level: Where mask is True
#   Second level: All
#   Third level: 'C1' and 'C3'
# Select columns:
#   First level: All
#   Second level: 'foo'
dfmi.loc[idx[mask, :, ['C1', 'C3']], idx[:, 'foo']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


In [50]:
# Select rows:
#   First level: All
#   Second level: All
#   Third level: 'C1' and 'C3'
# Select columns:
#   First level: All
#   Second level: All
dfmi.loc(axis=0)[:, :, ['C1', 'C3']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42
A0,B1,C1,D1,45,44,47,46
A0,B1,C3,D0,57,56,59,58
A0,B1,C3,D1,61,60,63,62
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78


In [51]:
# Select all rows where lowest level is 'C1' or 'C3'
# Set elements in these rows to -10
df2 = dfmi.copy()
df2.loc(axis=0)[:, :, ['C1', 'C3']] = -10
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,-10,-10,-10,-10
A0,B0,C1,D1,-10,-10,-10,-10
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,-10,-10,-10,-10
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,-10,-10,-10,-10


In [52]:
# Select all rows where lowest level is 'C1' or 'C3'
# Set elements in these rows to their values * 1000
df2 = dfmi.copy()
df2.loc(axis=0)[:, :, ['C1', 'C3']] *= 1000
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237000,236000,239000,238000
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249000,248000,251000,250000


## Cross-section

In [53]:
# Display original DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-1.115344,-1.972294,0.65868
bar,two,0.33626,-1.262359,1.955271
baz,one,0.241983,0.446274,1.278968
baz,two,1.302016,0.075196,-0.049237
foo,one,1.551178,0.371826,-0.433633
foo,two,0.651496,-0.69923,-1.82377
qux,one,-0.45674,-0.199597,-0.116647
qux,two,-0.287175,-0.335364,0.170029


In [54]:
# Take cross-section of DataFrame,
# selecting all rows where second level is 'one'
df.xs('one', level='second')

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,-1.115344,-1.972294,0.65868
baz,0.241983,0.446274,1.278968
foo,1.551178,0.371826,-0.433633
qux,-0.45674,-0.199597,-0.116647


In [55]:
# Take cross-section of DataFrame using slicers,
# selecting all rows where second level is 'one'
df.loc[(slice(None), 'one'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,-1.115344,-1.972294,0.65868
baz,one,0.241983,0.446274,1.278968
foo,one,1.551178,0.371826,-0.433633
qux,one,-0.45674,-0.199597,-0.116647


In [56]:
# Transpose DataFrame
df = df.T
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,-1.115344,0.33626,0.241983,1.302016,1.551178,0.651496,-0.45674,-0.287175
B,-1.972294,-1.262359,0.446274,0.075196,0.371826,-0.69923,-0.199597,-0.335364
C,0.65868,1.955271,1.278968,-0.049237,-0.433633,-1.82377,-0.116647,0.170029


In [57]:
# Take cross-section of DataFrame,
# selecting all columns where second level is 'one'
df.xs('one', level='second', axis=1)

first,bar,baz,foo,qux
A,-1.115344,0.241983,1.551178,-0.45674
B,-1.972294,0.446274,0.371826,-0.199597
C,0.65868,1.278968,-0.433633,-0.116647


In [58]:
# Take cross-section of DataFrame using slicers,
# selecting all columns where second level is 'one'
df.loc[:, (slice(None), 'one')]

first,bar,baz,foo,qux
second,one,one,one,one
A,-1.115344,0.241983,1.551178,-0.45674
B,-1.972294,0.446274,0.371826,-0.199597
C,0.65868,1.278968,-0.433633,-0.116647


In [59]:
# Take cross-section of DataFrame,
# selecting all columns where first is 'one' and second is 'bar'
df.xs(('one', 'bar'), level=('second', 'first'), axis=1)

first,bar
second,one
A,-1.115344
B,-1.972294
C,0.65868


In [60]:
# Take cross-section of DataFrame using slicers,
# selecting all columns where first is 'one' and second is 'bar'
df.loc[:, ('bar', 'one')]

A   -1.115344
B   -1.972294
C    0.658680
Name: (bar, one), dtype: float64

In [61]:
# Take cross-section of DataFrame,
# keeping the selected level
df.xs('one', level='second', axis=1, drop_level=False)

first,bar,baz,foo,qux
second,one,one,one,one
A,-1.115344,0.241983,1.551178,-0.45674
B,-1.972294,0.446274,0.371826,-0.199597
C,0.65868,1.278968,-0.433633,-0.116647


In [62]:
# Take cross-section of DataFrame,
# dropping the selected level
df.xs('one', level='second', axis=1, drop_level=True)

first,bar,baz,foo,qux
A,-1.115344,0.241983,1.551178,-0.45674
B,-1.972294,0.446274,0.371826,-0.199597
C,0.65868,1.278968,-0.433633,-0.116647


## Advanced reindexing and alignment

In [63]:
# Create MultiIndex DataFrame
m_idx = pd.MultiIndex(
    levels=[
        ['zero', 'one'],
        ['x', 'y']
    ],
    codes = [
        [1, 1, 0, 0],
        [1, 0, 1, 0]
    ]
)

df = pd.DataFrame(np.random.randn(4, 2), index=m_idx)
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.934628,1.225244
one,x,1.591822,-1.209922
zero,y,0.647496,1.326061
zero,x,1.024251,-0.749079


In [64]:
# Take mean of each DataFrame column at first index level
df2 = df.groupby(level=0).mean()
df2

Unnamed: 0,0,1
one,0.328597,0.007661
zero,0.835873,0.288491


In [65]:
# Reindex DataFrame to have original MultiIndex
df2.reindex(df.index, level=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,0.328597,0.007661
one,x,0.328597,0.007661
zero,y,0.835873,0.288491
zero,x,0.835873,0.288491


In [66]:
# Align DataFrames to each other
df.align(df2, level=0)

(               0         1
 one  y -0.934628  1.225244
      x  1.591822 -1.209922
 zero y  0.647496  1.326061
      x  1.024251 -0.749079,
                0         1
 one  y  0.328597  0.007661
      x  0.328597  0.007661
 zero y  0.835873  0.288491
      x  0.835873  0.288491)

## Swapping levels with `swaplevel`

In [67]:
# Show original DataFrame
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.934628,1.225244
one,x,1.591822,-1.209922
zero,y,0.647496,1.326061
zero,x,1.024251,-0.749079


In [68]:
# Swap two MultiIndex levels
df.swaplevel(0, 1, axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-0.934628,1.225244
x,one,1.591822,-1.209922
y,zero,0.647496,1.326061
x,zero,1.024251,-0.749079


## Reordering levels with `reorder_levels`

In [69]:
# Permute MultiIndex levels
df.reorder_levels([1, 0], axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-0.934628,1.225244
x,one,1.591822,-1.209922
y,zero,0.647496,1.326061
x,zero,1.024251,-0.749079


## Renaming names of an `Index` or `MultiIndex`

In [70]:
# Rename columns of MultiIndex DataFrame
df.rename(columns={
    0: 'col0',
    1: 'col1'
})

Unnamed: 0,Unnamed: 1,col0,col1
one,y,-0.934628,1.225244
one,x,1.591822,-1.209922
zero,y,0.647496,1.326061
zero,x,1.024251,-0.749079


In [71]:
# Rename index labels of MultiIndex DataFrame
df.rename(index={
    'one': 'two',
    'y': 'z'
})

Unnamed: 0,Unnamed: 1,0,1
two,z,-0.934628,1.225244
two,x,1.591822,-1.209922
zero,z,0.647496,1.326061
zero,x,1.024251,-0.749079


In [72]:
# Rename MultiIndex levels
df.rename_axis(index=['abc', 'def'])

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
abc,def,Unnamed: 2_level_1,Unnamed: 3_level_1
one,y,-0.934628,1.225244
one,x,1.591822,-1.209922
zero,y,0.647496,1.326061
zero,x,1.024251,-0.749079


In [73]:
# Create named MultiIndex
mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
mi

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['x', 'y'])

In [74]:
# Change MultiIndex level names
mi.set_names(['level1', 'level2'])

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['level1', 'level2'])

In [75]:
# Attempt to set name of MultiIndex via accessing name attribute of level
try:
    mi.levels[0].name = 'name'
except RuntimeError as e:
    print("RuntimeError:", e)

RuntimeError: Cannot set name on a level of a MultiIndex. Use 'MultiIndex.set_names' instead.


# Sorting a `MultiIndex`

In [76]:
# Create Series with randomized MultiIndex
import random
random.shuffle(tuples)

s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))
s

bar  two   -0.170724
baz  two   -1.146130
foo  one    0.160691
baz  one   -0.356309
qux  two   -1.692523
foo  two   -1.264434
bar  one   -0.838395
qux  one   -0.545450
dtype: float64

In [77]:
# Sort MultiIndex Series by index
s.sort_index()

bar  one   -0.838395
     two   -0.170724
baz  one   -0.356309
     two   -1.146130
foo  one    0.160691
     two   -1.264434
qux  one   -0.545450
     two   -1.692523
dtype: float64

In [78]:
# Sort MultiIndex Series by first level of index
s.sort_index(level=0)

bar  one   -0.838395
     two   -0.170724
baz  one   -0.356309
     two   -1.146130
foo  one    0.160691
     two   -1.264434
qux  one   -0.545450
     two   -1.692523
dtype: float64

In [79]:
# Sort MultiIndex Series by second level of index
s.sort_index(level=1)

bar  one   -0.838395
baz  one   -0.356309
foo  one    0.160691
qux  one   -0.545450
bar  two   -0.170724
baz  two   -1.146130
foo  two   -1.264434
qux  two   -1.692523
dtype: float64

In [80]:
# Set Series index to have names
s.index = s.index.set_names(['L1', 'L2'])
s

L1   L2 
bar  two   -0.170724
baz  two   -1.146130
foo  one    0.160691
baz  one   -0.356309
qux  two   -1.692523
foo  two   -1.264434
bar  one   -0.838395
qux  one   -0.545450
dtype: float64

In [81]:
# Sort MultiIndex Series by named index level
s.sort_index(level='L1')

L1   L2 
bar  one   -0.838395
     two   -0.170724
baz  one   -0.356309
     two   -1.146130
foo  one    0.160691
     two   -1.264434
qux  one   -0.545450
     two   -1.692523
dtype: float64

In [82]:
# Sort MultiIndex Series by named index level
s.sort_index(level='L2')

L1   L2 
bar  one   -0.838395
baz  one   -0.356309
foo  one    0.160691
qux  one   -0.545450
bar  two   -0.170724
baz  two   -1.146130
foo  two   -1.264434
qux  two   -1.692523
dtype: float64

In [83]:
# Sort DataFrame columns by second level of MultiIndex
df.T.sort_index(axis=1, level=1)

Unnamed: 0_level_0,one,zero,one,zero
Unnamed: 0_level_1,x,x,y,y
0,1.591822,1.024251,-0.934628,0.647496
1,-1.209922,-0.749079,1.225244,1.326061


In [84]:
# Create MultiIndex DataFrame with unsorted index
dfm = pd.DataFrame(
    {
        'jim': [0, 0, 1, 1],
        'joe': ['x', 'x', 'z', 'y'],
        'jolie': np.random.rand(4)
    }
).set_index(['jim', 'joe'])

dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.263255
0,x,0.296632
1,z,0.28447
1,y,0.991997


In [85]:
# Select row at (1, 'z') from unsorted DataFrame
# Notice that this raises a PerformanceWarning
dfm.loc[(1, 'z')]

  dfm.loc[(1, 'z')]


Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,z,0.28447


In [86]:
# Selecct row at (1, 'z') after sorting DataFrame
# Notice that this does not raise a PerformanceWarning
dfm.sort_index().loc[(1, 'z')]

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,z,0.28447


In [87]:
# Check that unsorted DataFrame is still unsorted
dfm.index.is_monotonic_increasing | dfm.index.is_monotonic_decreasing

False

In [88]:
# Check that sorted DataFrame is sorted
dfm.sort_index().index.is_monotonic_increasing | dfm.sort_index().index.is_monotonic_decreasing

True

In [89]:
# Attempt to slice rows from unsorted DataFrame
try:
    dfm.loc[(0, 'y'):(1, 'z')]
except Exception as e:
    print(type(e), e)

<class 'pandas.errors.UnsortedIndexError'> 'Key length (2) was greater than MultiIndex lexsort depth (1)'


In [90]:
# Slice rows from sorted DataFrame
dfm.sort_index().loc[(0, 'y'):(1, 'z')]

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,y,0.991997
1,z,0.28447


# Take methods

In [91]:
# Create Index with random integer values
index = pd.Index(np.random.randint(0, 1000, 10))
index

Index([707, 328, 398, 215, 127, 817, 589, 671, 731, 232], dtype='int64')

In [92]:
# Select values of Index at given positions
positions = [0, 9, 3]
index[positions]

Index([707, 232, 215], dtype='int64')

In [93]:
# Select values of Index at given positions
index.take(positions)

Index([707, 232, 215], dtype='int64')

In [94]:
# Create Series of random values
s = pd.Series(np.random.randn(10))
s

0   -0.301921
1   -0.078756
2   -0.378944
3   -2.397877
4   -0.140336
5    0.573688
6    0.122450
7   -0.516384
8    1.425606
9    0.883182
dtype: float64

In [95]:
# Select values of Series at given positions
s.iloc[positions]

0   -0.301921
9    0.883182
3   -2.397877
dtype: float64

In [96]:
# Select values of Series at given positions
s.take(positions)

0   -0.301921
9    0.883182
3   -2.397877
dtype: float64

In [97]:
# Create DataFrame
frm = pd.DataFrame(np.random.randn(5, 3))
frm

Unnamed: 0,0,1,2
0,0.501186,-0.944419,1.061727
1,2.588887,0.23869,-0.169949
2,-0.251766,1.187411,-0.554698
3,0.15805,0.82783,-0.200147
4,-0.185473,-1.101875,0.715541


In [98]:
# Select rows at given positions
frm.take([1, 4, 3])

Unnamed: 0,0,1,2
1,2.588887,0.23869,-0.169949
4,-0.185473,-1.101875,0.715541
3,0.15805,0.82783,-0.200147


In [99]:
# Select columns at given positions
frm.take([0, 2], axis=1)

Unnamed: 0,0,2
0,0.501186,1.061727
1,2.588887,-0.169949
2,-0.251766,-0.554698
3,0.15805,-0.200147
4,-0.185473,0.715541


In [100]:
# Create array of random values
arr = np.random.randn(10)
arr

array([-0.40241643,  0.44475297,  0.10704868, -0.06260224,  0.12963254,
        0.11958566,  0.4140846 , -0.04663672,  0.67451183, -1.50986224])

In [101]:
# Select elements from array with Boolean array
# Note that this just selects the first element twice, then the second element twice
arr.take([False, False, True, True])

array([-0.40241643, -0.40241643,  0.44475297,  0.44475297])

In [102]:
# Select first two elements of array
arr[[0, 1]]

array([-0.40241643,  0.44475297])

In [103]:
# Create Series of random values
s = pd.Series(np.random.randn(10))
s

0   -1.443906
1    0.873049
2   -0.932214
3    0.100651
4    1.410027
5   -0.591636
6    1.506515
7   -0.823106
8   -1.703150
9    0.481357
dtype: float64

In [104]:
# Select elements from Series with Boolean array
# Note that this just selects the first element twice, then the second element twice
s.take([False, False, True, True])

0   -1.443906
0   -1.443906
1    0.873049
1    0.873049
dtype: float64

In [105]:
# Select first two elements of Series
s.iloc[[0, 1]]

0   -1.443906
1    0.873049
dtype: float64

In [106]:
# Compare performance of [] vs .take() for array
arr = np.random.randn(10000, 5)
indexer = np.arange(10000)
random.shuffle(indexer)

%timeit arr[indexer]
%timeit arr.take(indexer, axis=0)

120 μs ± 884 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
33.7 μs ± 854 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [107]:
# Compare performance of [] vs .take() for Series
s = pd.Series(np.random.randn(10000))
indexer = np.arange(10000)
random.shuffle(indexer)

%timeit s[indexer]
%timeit s.take(indexer)

462 μs ± 22.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
70 μs ± 1.73 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


# `Index` types

## `CategoricalIndex`

In [110]:
# Create DataFrame with categorical column
from pandas.api.types import CategoricalDtype

df = pd.DataFrame({
    'A': np.arange(6),
    'B': list('aabbca')
})

df['B'] = df['B'].astype(CategoricalDtype(list('cab')))

df

Unnamed: 0,A,B
0,0,a
1,1,a
2,2,b
3,3,b
4,4,c
5,5,a


In [111]:
# Show categories of categorical column
df['B'].cat.categories

Index(['c', 'a', 'b'], dtype='object')

In [112]:
# Use categorical column as index with repeated values
df2 = df.set_index('B')
df2

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
b,2
b,3
c,4
a,5


In [113]:
# Show categories of categorical index
df2.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, dtype='category', name='B')

In [114]:
# Select rows of DataFrame where index is 'a'
df2.loc['a']

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
a,5


In [None]:
# Sort DataFrame by index
# Note that this sorts by the order of the categories
df2.sort_index()

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
c,4
a,0
a,1
a,5
b,2
b,3


In [116]:
# Perform groupby operation DataFrame with categorical index
df2.groupby(level=0, observed=True).sum() 

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
c,4
a,6
b,5


In [118]:
# Show that index is still CategoricalIndex after groupby operation
df2.groupby(level=0, observed=True).sum().index

CategoricalIndex(['c', 'a', 'b'], categories=['c', 'a', 'b'], ordered=False, dtype='category', name='B')

In [119]:
# Create DataFrame with categorical column
df3 = pd.DataFrame({
    'A': np.arange(3),
    'B': pd.Series(list('abc')).astype('category')
})

df3

Unnamed: 0,A,B
0,0,a
1,1,b
2,2,c


In [120]:
# Make categorical column the DataFrame's index
df3 = df3.set_index('B')
df3

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
b,1
c,2


In [122]:
# Reindex DataFrame with list of non-categorical values
df3.reindex(['a', 'e'])

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0.0
e,


In [123]:
# Show that reindexing with a list of non-categorical values turns the CategoricalIndex into an Index
df3.reindex(['a', 'e']).index

Index(['a', 'e'], dtype='object', name='B')

In [None]:
# Reindex DataFarme with a CategoricalIndex
df3.reindex(pd.CategoricalIndex(['a', 'e'], categories=list('abe')))

Unnamed: 0,A
a,0.0
e,


In [125]:
# Show that reindexing with a CategoricalIndex keeps the CategoricalIndex
df3.reindex(pd.CategoricalIndex(['a', 'e'], categories=list('abe'))).index

CategoricalIndex(['a', 'e'], categories=['a', 'b', 'e'], ordered=False, dtype='category')

In [126]:
# Create DataFrame with categorical column
# Use the categorical column as a CategoricalIndex
df4 = pd.DataFrame({
    'A': np.arange(2),
    'B': list('ba')
})

df4['B'] = df4['B'].astype(CategoricalDtype(list('ab')))
df4 = df4.set_index('B')

df4

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
a,1


In [127]:
# Create another DataFrame with a categorical column
# Use the categorical column as a CategoricalIndex
df5 = pd.DataFrame({
    'A': np.arange(2),
    'B': list('bc')
})

df5['B'] = df5['B'].astype(CategoricalDtype(list('bc')))
df5 = df5.set_index('B')

df5

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
c,1


In [None]:
# Concatenate two DataFrames with CategoricalIndex indices
pd.concat([df4, df5])

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
a,1
b,0
c,1


## `RangeIndex`

In [129]:
# Create a RangeIndex
idx = pd.RangeIndex(5)
idx

RangeIndex(start=0, stop=5, step=1)

In [130]:
# Create Series
ser = pd.Series([1, 2, 3])
ser

0    1
1    2
2    3
dtype: int64

In [131]:
# Show that default index of Series is RangeIndex
ser.index

RangeIndex(start=0, stop=3, step=1)

In [132]:
# Create DataFrame
df = pd.DataFrame([[1, 2], [3, 4]])
df

Unnamed: 0,0,1
0,1,2
1,3,4


In [133]:
# Show that default index of DataFrame is RangeIndex
df.index

RangeIndex(start=0, stop=2, step=1)

In [134]:
# Show that default column index of DataFrame is RangeIndex
df.columns

RangeIndex(start=0, stop=2, step=1)

## `IntervalIndex`

### Indexing with an `IntervalIndex`

In [135]:
# Create DataFrame with InvervalIndex
df = pd.DataFrame(
    {'A': [1, 2, 3, 4]},
    index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])
)

df

Unnamed: 0,A
"(0, 1]",1
"(1, 2]",2
"(2, 3]",3
"(3, 4]",4


In [None]:
# Select rows of DataFrame in the interval containing 2
df.loc[2]

A    2
Name: (1, 2], dtype: int64

In [None]:
# Select rows of DataFrame in the interval [2, 3]
df.loc[2:3]


Unnamed: 0,A
"(1, 2]",2
"(2, 3]",3


In [139]:
# Select rows of DataFrame in the interval containing 2.5
df.loc[2.5]

A    3
Name: (2, 3], dtype: int64

In [140]:
# Select rows of DataFrame in the interval [2.5, 3.5]
df.loc[2.5:3.5]

Unnamed: 0,A
"(2, 3]",3
"(3, 4]",4


In [None]:
# Select rows of DataFrame indexed by the interval (1, 2]
df.loc[pd.Interval(1, 2)]

A    2
Name: (1, 2], dtype: int64

In [142]:
# Attempt to select rows of DataFrame indexed by an interval that is not in the index
try:
    df.loc[pd.Interval(0.5, 2.5)]
except KeyError as e:
    print("KeyError:", e)

KeyError: Interval(0.5, 2.5, closed='right')


In [144]:
# Select all rows of DataFrame that overlap a given interval
df[df.index.overlaps(pd.Interval(0.5, 2.5))]

Unnamed: 0,A
"(0, 1]",1
"(1, 2]",2
"(2, 3]",3


### Binning data with `cut` and `qcut`

In [145]:
# Create bins
c = pd.cut(range(4), bins=2)
c

[(-0.003, 1.5], (-0.003, 1.5], (1.5, 3.0], (1.5, 3.0]]
Categories (2, interval[float64, right]): [(-0.003, 1.5] < (1.5, 3.0]]

In [146]:
# Show that bin categories are an IntervalIndex
c.categories

IntervalIndex([(-0.003, 1.5], (1.5, 3.0]], dtype='interval[float64, right]')

In [147]:
# Use cut to generate bins
# Bin new data using the same bins
c = pd.cut(range(4), bins=[0, 1, 2, 3])
pd.cut([0, 3, 5, 1], bins=c.categories)

[NaN, (2.0, 3.0], NaN, (0.0, 1.0]]
Categories (3, interval[int64, right]): [(0, 1] < (1, 2] < (2, 3]]

### Generating ranges of intervals

In [148]:
# Create IntervalIndex using start and end points
pd.interval_range(start=0, end=5)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]')

In [None]:
# Create IntervalIndex using start point and periods
pd.interval_range(start=pd.Timestamp('2024-11-27'), periods=4)

IntervalIndex([(2024-11-27 00:00:00, 2024-11-28 00:00:00],
               (2024-11-28 00:00:00, 2024-11-29 00:00:00],
               (2024-11-29 00:00:00, 2024-11-30 00:00:00],
               (2024-11-30 00:00:00, 2024-12-01 00:00:00]],
              dtype='interval[datetime64[ns], right]')

In [None]:
# Create IntervalIndex using end point and periods
pd.interval_range(end=pd.Timedelta('3 days'), periods=3)

IntervalIndex([(0 days 00:00:00, 1 days 00:00:00],
               (1 days 00:00:00, 2 days 00:00:00],
               (2 days 00:00:00, 3 days 00:00:00]],
              dtype='interval[timedelta64[ns], right]')

In [151]:
# Create IntervalIndex using start point, periods, and frequency
pd.interval_range(start=0, periods=5, freq=1.5)

IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0], (6.0, 7.5]], dtype='interval[float64, right]')

In [152]:
# Create IntervalIndex using start point, periods, and frequency
pd.interval_range(start=pd.Timestamp('2024-11-27'), periods=4, freq='W')

IntervalIndex([(2024-12-01 00:00:00, 2024-12-08 00:00:00],
               (2024-12-08 00:00:00, 2024-12-15 00:00:00],
               (2024-12-15 00:00:00, 2024-12-22 00:00:00],
               (2024-12-22 00:00:00, 2024-12-29 00:00:00]],
              dtype='interval[datetime64[ns], right]')

In [154]:
# Create IntervalIndex using start point, periods, and frequency
pd.interval_range(start=pd.Timedelta('0 days'), periods=3, freq='9h')

IntervalIndex([(0 days 00:00:00, 0 days 09:00:00],
               (0 days 09:00:00, 0 days 18:00:00],
               (0 days 18:00:00, 1 days 03:00:00]],
              dtype='interval[timedelta64[ns], right]')

In [155]:
# Create IntervalIndex using start and end points, and closed parameter
pd.interval_range(start=0, end=4, closed='both')

IntervalIndex([[0, 1], [1, 2], [2, 3], [3, 4]], dtype='interval[int64, both]')

In [156]:
# Create IntervalIndex using start and end points, and close parameter
pd.interval_range(start=0, end=4, closed='neither')

IntervalIndex([(0, 1), (1, 2), (2, 3), (3, 4)], dtype='interval[int64, neither]')

# Miscellaneous indexing FAQ

## Integer indexing

## Non-monotonic indexes require exact matches

## Endpoints are inclusive

## Indexing potentially changes underlying `Series` dtype