<a href="https://colab.research.google.com/github/Zain-khan/pandas.org/blob/main/Advanced_multiindexing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
arrays = [
    ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
    ["one", "two", "one", "two", "one", "two", "one", "two"],
]


tuples = pd.MultiIndex.from_tuples(zip(*arrays))
tuples = tuples.astype(object)
tuples

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [None]:
index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])


index

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [None]:
s = pd.Series(np.random.randn(8), index=index)

s

first  second
bar    one      -0.047371
       two       0.435740
baz    one      -0.295517
       two      -0.000276
foo    one      -0.711663
       two      -0.076667
qux    one       0.966457
       two       0.958111
dtype: float64

In [None]:
iterables = [["bar", "baz", "foo", "qux"], ["one", "two"]]

pd.MultiIndex.from_product(iterables, names=["first", "second"])

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           names=['first', 'second'])

In [None]:
df = pd.DataFrame(
    [["bar", "one"], ["bar", "two"], ["foo", "one"], ["foo", "two"]],
    columns=["first", "second"],
)
df

Unnamed: 0,first,second
0,bar,one
1,bar,two
2,foo,one
3,foo,two


In [None]:
pd.MultiIndex.from_frame(df)

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('foo', 'one'),
            ('foo', 'two')],
           names=['first', 'second'])

In [None]:
arrays = [
    np.array(["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"]),
    np.array(["one", "two", "one", "two", "one", "two", "one", "two"]),
]


s = pd.Series(np.random.randn(8), index=arrays)

s

bar  one    1.169503
     two    0.694424
baz  one   -1.042861
     two   -1.110104
foo  one    0.101914
     two   -0.231094
qux  one   -0.094677
     two   -0.410490
dtype: float64

In [None]:
df = pd.DataFrame(np.random.randn(8, 4), index=arrays)

df

Unnamed: 0,Unnamed: 1,0,1,2,3
bar,one,1.442535,-2.078586,0.546471,1.06113
bar,two,-0.060031,0.574026,2.354786,-2.282789
baz,one,0.505928,-0.721262,1.826192,-1.064124
baz,two,0.336131,-0.145381,-1.078362,-1.884117
foo,one,-0.838272,1.216057,-0.185691,-0.127518
foo,two,0.348157,-0.043586,-2.086752,0.621819
qux,one,-0.395454,0.467683,1.827621,-0.968587
qux,two,-0.599296,1.925229,0.202001,0.841316


In [None]:
df.index.names

FrozenList([None, None])

In [None]:
df = pd.DataFrame(np.random.randn(3, 8), index=["A", "B", "C"], columns=index)

df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.605983,0.048924,-1.192358,0.154942,0.093449,1.900251,0.433313,0.674233
B,0.616919,-0.701109,2.070453,-0.591316,-0.486282,0.613749,-0.738528,0.88512
C,2.683783,0.350273,0.830113,-0.083357,0.255827,1.628754,-1.109141,-0.567431


In [None]:
 pd.DataFrame(np.random.randn(6, 6), index=index[:6], columns=index[:6])

Unnamed: 0_level_0,first,bar,bar,baz,baz,foo,foo
Unnamed: 0_level_1,second,one,two,one,two,one,two
first,second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
bar,one,-0.741825,2.522883,0.053137,0.50591,0.102076,0.236779
bar,two,0.468774,0.001856,-0.074763,-0.06596,1.272004,-0.144814
baz,one,-0.685661,0.872562,-0.930669,-0.464303,0.988672,-0.83163
baz,two,1.59699,1.488785,-0.659213,0.002122,2.648957,1.108055
foo,one,2.792885,0.048546,-0.6569,0.502628,-0.771726,1.410001
foo,two,-1.377136,-1.436703,-1.165396,-1.900524,-0.036675,-0.502075


In [None]:
with pd.option_context("display.multi_sparse", False):df

In [None]:
pd.Series(np.random.randn(8), index=tuples)

bar  one    0.839176
     two   -0.504661
baz  one    0.974997
     two   -1.978226
foo  one    1.401278
     two   -0.529174
qux  one   -0.021957
     two   -1.006030
dtype: float64

In [None]:
index.get_level_values(0)

Index(['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [None]:
index.get_level_values("second")

Index(['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], dtype='object', name='second')

In [None]:
df["bar"]

second,one,two
A,0.605983,0.048924
B,0.616919,-0.701109
C,2.683783,0.350273


In [None]:
df["baz"]

second,one,two
A,-1.192358,0.154942
B,2.070453,-0.591316
C,0.830113,-0.083357


In [None]:
df["bar", "one"]

A    0.605983
B    0.616919
C    2.683783
Name: (bar, one), dtype: float64

In [None]:
df["bar", "two"]

A    0.048924
B   -0.701109
C    0.350273
Name: (bar, two), dtype: float64

In [None]:
df["bar"]["two"]

A    0.048924
B   -0.701109
C    0.350273
Name: two, dtype: float64

In [None]:
s["qux"]

one   -0.094677
two   -0.410490
dtype: float64

In [None]:
orignal_levels = df.columns.levels
orignal_levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [None]:
orignal_names = df.columns.names
orignal_names

FrozenList(['first', 'second'])

In [None]:
levels=df[["bar","foo"]].columns.levels

In [None]:
names=df[["bar", "qux"]].columns.names

In [None]:
new_df = pd.DataFrame(levels, names)
new_df

Unnamed: 0,0,1,2,3
first,bar,baz,foo,qux
second,one,two,,


In [None]:
sliced_df = pd.DataFrame(orignal_levels, orignal_names)
sliced_df

Unnamed: 0,0,1,2,3
first,bar,baz,foo,qux
second,one,two,,


In [None]:
df[["foo", "qux"]].columns.to_numpy()

array([('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')],
      dtype=object)

In [None]:
df[["foo", "qux"]].columns.get_level_values(0)

Index(['foo', 'foo', 'qux', 'qux'], dtype='object', name='first')

In [None]:
new_mi = df[["foo", "qux"]].columns.remove_unused_levels()
new_mi.levels

FrozenList([['foo', 'qux'], ['one', 'two']])

In [None]:
s

bar  one    1.169503
     two    0.694424
baz  one   -1.042861
     two   -1.110104
foo  one    0.101914
     two   -0.231094
qux  one   -0.094677
     two   -0.410490
dtype: float64

In [None]:
s + s[:-2]

bar  one    2.339006
     two    1.388849
baz  one   -2.085722
     two   -2.220208
foo  one    0.203827
     two   -0.462188
qux  one         NaN
     two         NaN
dtype: float64

In [None]:
s.reindex(index[:3])

first  second
bar    one       1.169503
       two       0.694424
baz    one      -1.042861
dtype: float64

In [None]:
s.reindex([("foo", "two"), ("bar", "one"), ("qux", "one"), ("baz", "one")])

foo  two   -0.231094
bar  one    1.169503
qux  one   -0.094677
baz  one   -1.042861
dtype: float64

In [None]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.605983,0.048924,-1.192358,0.154942,0.093449,1.900251,0.433313,0.674233
B,0.616919,-0.701109,2.070453,-0.591316,-0.486282,0.613749,-0.738528,0.88512
C,2.683783,0.350273,0.830113,-0.083357,0.255827,1.628754,-1.109141,-0.567431


In [None]:
df = df.T

df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.605983,0.616919,2.683783
bar,two,0.048924,-0.701109,0.350273
baz,one,-1.192358,2.070453,0.830113
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754
qux,one,0.433313,-0.738528,-1.109141
qux,two,0.674233,0.88512,-0.567431


In [None]:
df.loc[("bar", "two")]

A    0.048924
B   -0.701109
C    0.350273
Name: (bar, two), dtype: float64

In [None]:
df.loc[("bar", "two"), "A"]

0.04892389406299117

In [None]:
df.loc["bar"]

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.605983,0.616919,2.683783
two,0.048924,-0.701109,0.350273


In [None]:
df.loc["baz":"foo"]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,one,-1.192358,2.070453,0.830113
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754


In [None]:
df.loc[("baz", "two"):("qux", "one")]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754
qux,one,0.433313,-0.738528,-1.109141


In [None]:
df.loc[("baz", "two"):"foo"]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754


In [None]:
df.loc[[("bar", "two"), ("qux", "one")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,two,0.048924,-0.701109,0.350273
qux,one,0.433313,-0.738528,-1.109141


In [None]:
s = pd.Series(
    [1, 2, 3, 4, 5, 6],
    index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]),
)
s

A  c    1
   d    2
   e    3
B  c    4
   d    5
   e    6
dtype: int64

In [None]:
s.loc[[("A", "c"), ("B", "d")]]# list of tuples

A  c    1
B  d    5
dtype: int64

In [None]:
s.loc[(["A", "B"], ["c", "d"])]  # tuple of lists

A  c    1
   d    2
B  c    4
   d    5
dtype: int64

In [None]:
I =[1,2,3,4,5]
value = [x&1 for x in I]
print(value)

[1, 0, 1, 0, 1]


In [None]:
df.loc[(slice("A1", "A3"), ), :]  # noqa: E999

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
df.loc[(slice("A1", "A3"), )]  # noqa: E999

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1


In [None]:
def mklbl(prefix, n):
    return ["%s%s" % (prefix, i) for i in range(n)]

In [None]:
miindex = pd.MultiIndex.from_product(
    [mklbl("A", 4), mklbl("B", 2), mklbl("C", 4), mklbl("D", 2)]
)
miindex

MultiIndex([('A0', 'B0', 'C0', 'D0'),
            ('A0', 'B0', 'C0', 'D1'),
            ('A0', 'B0', 'C1', 'D0'),
            ('A0', 'B0', 'C1', 'D1'),
            ('A0', 'B0', 'C2', 'D0'),
            ('A0', 'B0', 'C2', 'D1'),
            ('A0', 'B0', 'C3', 'D0'),
            ('A0', 'B0', 'C3', 'D1'),
            ('A0', 'B1', 'C0', 'D0'),
            ('A0', 'B1', 'C0', 'D1'),
            ('A0', 'B1', 'C1', 'D0'),
            ('A0', 'B1', 'C1', 'D1'),
            ('A0', 'B1', 'C2', 'D0'),
            ('A0', 'B1', 'C2', 'D1'),
            ('A0', 'B1', 'C3', 'D0'),
            ('A0', 'B1', 'C3', 'D1'),
            ('A1', 'B0', 'C0', 'D0'),
            ('A1', 'B0', 'C0', 'D1'),
            ('A1', 'B0', 'C1', 'D0'),
            ('A1', 'B0', 'C1', 'D1'),
            ('A1', 'B0', 'C2', 'D0'),
            ('A1', 'B0', 'C2', 'D1'),
            ('A1', 'B0', 'C3', 'D0'),
            ('A1', 'B0', 'C3', 'D1'),
            ('A1', 'B1', 'C0', 'D0'),
            ('A1', 'B1', 'C0', 'D1'),
            

In [None]:
micolumns = pd.MultiIndex.from_tuples(
    [("a", "foo"), ("a", "bar"), ("b", "foo"), ("b", "bah")], names=["lvl0", "lvl1"]
)
micolumns

MultiIndex([('a', 'foo'),
            ('a', 'bar'),
            ('b', 'foo'),
            ('b', 'bah')],
           names=['lvl0', 'lvl1'])

In [None]:
dfmi = (
    pd.DataFrame(
        np.arange(len(miindex) * len(micolumns)).reshape(
            (len(miindex), len(micolumns))
        ),
        index=miindex,
        columns=micolumns,
    )
    .sort_index()
    .sort_index(axis=1)
)

In [None]:
dfmi

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237,236,239,238
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249,248,251,250


In [None]:
dfmi.loc[(slice("A1", "A3"), slice(None), ["C1", "C3"]), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78
A1,B0,C3,D0,89,88,91,90
A1,B0,C3,D1,93,92,95,94
A1,B1,C1,D0,105,104,107,106
A1,B1,C1,D1,109,108,111,110
A1,B1,C3,D0,121,120,123,122
A1,B1,C3,D1,125,124,127,126
A2,B0,C1,D0,137,136,139,138
A2,B0,C1,D1,141,140,143,142


In [None]:
idx = pd.IndexSlice

dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [None]:
dfmi.loc["A1", (slice(None), "foo")]

Unnamed: 0_level_0,Unnamed: 1_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,lvl1,foo,foo
B0,C0,D0,64,66
B0,C0,D1,68,70
B0,C1,D0,72,74
B0,C1,D1,76,78
B0,C2,D0,80,82
B0,C2,D1,84,86
B0,C3,D0,88,90
B0,C3,D1,92,94
B1,C0,D0,96,98
B1,C0,D1,100,102


In [None]:
dfmi.loc[idx[:, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A0,B0,C1,D0,8,10
A0,B0,C1,D1,12,14
A0,B0,C3,D0,24,26
A0,B0,C3,D1,28,30
A0,B1,C1,D0,40,42
A0,B1,C1,D1,44,46
A0,B1,C3,D0,56,58
A0,B1,C3,D1,60,62
A1,B0,C1,D0,72,74
A1,B0,C1,D1,76,78


In [None]:
mask = dfmi[("a", "foo")] > 200
dfmi.loc[idx[mask, :, ["C1", "C3"]], idx[:, "foo"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,foo,foo
A3,B0,C1,D1,204,206
A3,B0,C3,D0,216,218
A3,B0,C3,D1,220,222
A3,B1,C1,D0,232,234
A3,B1,C1,D1,236,238
A3,B1,C3,D0,248,250
A3,B1,C3,D1,252,254


In [None]:
dfmi.loc(axis=0)[:, :, ["C1", "C3"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42
A0,B1,C1,D1,45,44,47,46
A0,B1,C3,D0,57,56,59,58
A0,B1,C3,D1,61,60,63,62
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78


In [None]:
dfmi.loc[:, :, ["C1", "C3"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C1,D0,9,8,11,10
A0,B0,C1,D1,13,12,15,14
A0,B0,C3,D0,25,24,27,26
A0,B0,C3,D1,29,28,31,30
A0,B1,C1,D0,41,40,43,42
A0,B1,C1,D1,45,44,47,46
A0,B1,C3,D0,57,56,59,58
A0,B1,C3,D1,61,60,63,62
A1,B0,C1,D0,73,72,75,74
A1,B0,C1,D1,77,76,79,78


In [None]:
df2 = dfmi.copy()

df2.loc(axis=0)[:, :, ["C1", "C3"]] = -10

df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,-10,-10,-10,-10
A0,B0,C1,D1,-10,-10,-10,-10
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,-10,-10,-10,-10
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,-10,-10,-10,-10


In [None]:
df2 = dfmi.copy()

df2.loc[idx[:, :, ["C1", "C3"]], :] = df2 * 1000

df2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lvl0,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,lvl1,bar,foo,bah,foo
A0,B0,C0,D0,1,0,3,2
A0,B0,C0,D1,5,4,7,6
A0,B0,C1,D0,9000,8000,11000,10000
A0,B0,C1,D1,13000,12000,15000,14000
A0,B0,C2,D0,17,16,19,18
...,...,...,...,...,...,...,...
A3,B1,C1,D1,237000,236000,239000,238000
A3,B1,C2,D0,241,240,243,242
A3,B1,C2,D1,245,244,247,246
A3,B1,C3,D0,249000,248000,251000,250000


In [None]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.605983,0.616919,2.683783
bar,two,0.048924,-0.701109,0.350273
baz,one,-1.192358,2.070453,0.830113
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754
qux,one,0.433313,-0.738528,-1.109141
qux,two,0.674233,0.88512,-0.567431


In [None]:
df.xs("one", level="second")

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,0.605983,0.616919,2.683783
baz,-1.192358,2.070453,0.830113
foo,0.093449,-0.486282,0.255827
qux,0.433313,-0.738528,-1.109141


In [None]:
df.xs("bar", level="first")

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.605983,0.616919,2.683783
two,0.048924,-0.701109,0.350273


In [None]:
df.loc[(slice(None), "one"), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.605983,0.616919,2.683783
baz,one,-1.192358,2.070453,0.830113
foo,one,0.093449,-0.486282,0.255827
qux,one,0.433313,-0.738528,-1.109141


In [None]:
df = df.T

In [None]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.605983,0.048924,-1.192358,0.154942,0.093449,1.900251,0.433313,0.674233
B,0.616919,-0.701109,2.070453,-0.591316,-0.486282,0.613749,-0.738528,0.88512
C,2.683783,0.350273,0.830113,-0.083357,0.255827,1.628754,-1.109141,-0.567431


In [None]:
df =df.T

In [None]:
df.xs("one", level="second", axis=0)

Unnamed: 0_level_0,A,B,C
first,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,0.605983,0.616919,2.683783
baz,-1.192358,2.070453,0.830113
foo,0.093449,-0.486282,0.255827
qux,0.433313,-0.738528,-1.109141


In [None]:
df.xs("bar", level="first", axis=0)

Unnamed: 0_level_0,A,B,C
second,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0.605983,0.616919,2.683783
two,0.048924,-0.701109,0.350273


In [None]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.605983,0.616919,2.683783
bar,two,0.048924,-0.701109,0.350273
baz,one,-1.192358,2.070453,0.830113
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754
qux,one,0.433313,-0.738528,-1.109141
qux,two,0.674233,0.88512,-0.567431


In [None]:
row_levels = df.index.levels
row_levels

FrozenList([['bar', 'baz', 'foo', 'qux'], ['one', 'two']])

In [None]:
column_levels = df.columns.nlevels
column_levels

1

In [None]:
row_level_names = df.index.names
row_level_names

FrozenList(['first', 'second'])

In [None]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B,C
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,0.605983,0.616919,2.683783
bar,two,0.048924,-0.701109,0.350273
baz,one,-1.192358,2.070453,0.830113
baz,two,0.154942,-0.591316,-0.083357
foo,one,0.093449,-0.486282,0.255827
foo,two,1.900251,0.613749,1.628754
qux,one,0.433313,-0.738528,-1.109141
qux,two,0.674233,0.88512,-0.567431


In [None]:
df=df.T

In [None]:
df

first,bar,bar,baz,baz,foo,foo,qux,qux
second,one,two,one,two,one,two,one,two
A,0.605983,0.048924,-1.192358,0.154942,0.093449,1.900251,0.433313,0.674233
B,0.616919,-0.701109,2.070453,-0.591316,-0.486282,0.613749,-0.738528,0.88512
C,2.683783,0.350273,0.830113,-0.083357,0.255827,1.628754,-1.109141,-0.567431


In [None]:
df.loc[:, (slice(None), "one")]

first,bar,baz,foo,qux
second,one,one,one,one
A,0.605983,-1.192358,0.093449,0.433313
B,0.616919,2.070453,-0.486282,-0.738528
C,2.683783,0.830113,0.255827,-1.109141


In [None]:
df.xs(("one", "bar"), level=("second", "first"), axis=1)

first,bar
second,one
A,0.605983
B,0.616919
C,2.683783


In [None]:
df.loc[:, ("bar", "one")]

A    0.605983
B    0.616919
C    2.683783
Name: (bar, one), dtype: float64

In [None]:
df.xs("one", level="second", axis=1, drop_level=False)

first,bar,baz,foo,qux
second,one,one,one,one
A,0.605983,-1.192358,0.093449,0.433313
B,0.616919,2.070453,-0.486282,-0.738528
C,2.683783,0.830113,0.255827,-1.109141


In [None]:
df.xs("one", level="second", axis=1, drop_level=True)

first,bar,baz,foo,qux
A,0.605983,-1.192358,0.093449,0.433313
B,0.616919,2.070453,-0.486282,-0.738528
C,2.683783,0.830113,0.255827,-1.109141


In [None]:
midx = pd.MultiIndex(
    levels=[["zero", "one"], ["x", "y"]], codes=[[1, 1, 0, 0], [1, 0, 1, 0]]
)
midx

MultiIndex([( 'one', 'y'),
            ( 'one', 'x'),
            ('zero', 'y'),
            ('zero', 'x')],
           )

In [None]:
df = pd.DataFrame(np.random.randn(4, 2), index=midx)

df

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df2 = df.groupby(level=0).mean()

df2

Unnamed: 0,0,1
one,-0.198681,-0.79198
zero,-1.27032,0.122903


In [None]:
df2.reindex(df.index, level=0)

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.198681,-0.79198
one,x,-0.198681,-0.79198
zero,y,-1.27032,0.122903
zero,x,-1.27032,0.122903


In [None]:
df_aligned, df2_aligned = df.align(df2, level=0)

df_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df2_aligned

Unnamed: 0,Unnamed: 1,0,1
one,y,-0.198681,-0.79198
one,x,-0.198681,-0.79198
zero,y,-1.27032,0.122903
zero,x,-1.27032,0.122903


In [None]:
df[:5]

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df[:5].swaplevel(0, 1, axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-1.511186,-0.37027
x,one,1.113824,-1.213689
y,zero,-0.499417,-1.514281
x,zero,-2.041222,1.760088


In [None]:
df[:5].reorder_levels([1, 0], axis=0)

Unnamed: 0,Unnamed: 1,0,1
y,one,-1.511186,-0.37027
x,one,1.113824,-1.213689
y,zero,-0.499417,-1.514281
x,zero,-2.041222,1.760088


In [None]:
df.rename(columns={0: "col0", 1: "col1"})

Unnamed: 0,Unnamed: 1,col0,col1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df.rename(index={"one": "two", "y": "z"})

Unnamed: 0,Unnamed: 1,0,1
two,z,-1.511186,-0.37027
two,x,1.113824,-1.213689
zero,z,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df.rename_axis(index=["abc", "def"])

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1
abc,def,Unnamed: 2_level_1,Unnamed: 3_level_1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
df.rename_axis(columns="Cols").columns

RangeIndex(start=0, stop=2, step=1, name='Cols')

In [None]:
df

Unnamed: 0,Unnamed: 1,0,1
one,y,-1.511186,-0.37027
one,x,1.113824,-1.213689
zero,y,-0.499417,-1.514281
zero,x,-2.041222,1.760088


In [None]:
mi = pd.MultiIndex.from_product([[1, 2], ["a", "b"]], names=["x", "y"])

mi.names

FrozenList(['x', 'y'])

In [None]:
mi

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['x', 'y'])

In [None]:
mi2 = mi.rename("new name", level=0)

mi2

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['new name', 'y'])

In [None]:
mi.set_names(levels[1]== "name")

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=[False, False])

In [None]:
mi.set_names("C", level=1)

MultiIndex([(1, 'a'),
            (1, 'b'),
            (2, 'a'),
            (2, 'b')],
           names=['x', 'C'])

In [None]:
tuples

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [None]:


tuples = [('foo', 'one'), ('bar', 'one'), ('baz', 'one'), ('qux', 'two'),
          ('foo', 'two'), ('bar', 'one'), ('baz', 'one'), ('qux', 'two')]
tuples

[('foo', 'one'),
 ('bar', 'one'),
 ('baz', 'one'),
 ('qux', 'two'),
 ('foo', 'two'),
 ('bar', 'one'),
 ('baz', 'one'),
 ('qux', 'two')]

In [None]:
import random

random.shuffle(tuples)

s = pd.Series(np.random.randn(8), index=pd.MultiIndex.from_tuples(tuples))

s

foo  one   -1.906020
bar  one    2.489909
qux  two    0.062620
foo  two   -0.749303
bar  one   -2.133424
baz  one   -0.599519
     one   -1.440946
qux  two   -1.588501
dtype: float64

In [None]:
s.sort_index()

bar  one    2.489909
     one   -2.133424
baz  one   -0.599519
     one   -1.440946
foo  one   -1.906020
     two   -0.749303
qux  two    0.062620
     two   -1.588501
dtype: float64

In [None]:
s.sort_index(level=0)

bar  one    2.489909
     one   -2.133424
baz  one   -0.599519
     one   -1.440946
foo  one   -1.906020
     two   -0.749303
qux  two    0.062620
     two   -1.588501
dtype: float64

In [None]:
s.sort_index(level=1)

bar  one    2.489909
     one   -2.133424
baz  one   -0.599519
     one   -1.440946
foo  one   -1.906020
     two   -0.749303
qux  two    0.062620
     two   -1.588501
dtype: float64

In [None]:
s.index = s.index.set_names(["L1", "L2"])

In [None]:
s.sort_index(level="L1")

L1   L2 
bar  one    2.489909
     one   -2.133424
baz  one   -0.599519
     one   -1.440946
foo  one   -1.906020
     two   -0.749303
qux  two    0.062620
     two   -1.588501
dtype: float64

In [None]:
s.sort_index(level="L2")

L1   L2 
bar  one    2.489909
     one   -2.133424
baz  one   -0.599519
     one   -1.440946
foo  one   -1.906020
     two   -0.749303
qux  two    0.062620
     two   -1.588501
dtype: float64

In [None]:
df.T.sort_index(level=1, axis=1)

Unnamed: 0_level_0,one,zero,one,zero
Unnamed: 0_level_1,x,x,y,y
0,1.113824,-2.041222,-1.511186,-0.499417
1,-1.213689,1.760088,-0.37027,-1.514281


In [None]:
dfm = pd.DataFrame(
    {"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"], "jolie": np.random.rand(4)}
)
dfm

Unnamed: 0,jim,joe,jolie
0,0,x,0.613013
1,0,x,0.430864
2,1,z,0.039586
3,1,y,0.345307


In [None]:

dfm = dfm.set_index(["jim", "joe"])

dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.613013
0,x,0.430864
1,z,0.039586
1,y,0.345307


In [None]:
dfm.loc[(1, 'z')]

  dfm.loc[(1, 'z')]


Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,z,0.039586


In [None]:
dfm.loc[(0, 'y'):(1, 'z')]

UnsortedIndexError: ignored

In [None]:
dfm.index.is_monotonic_increasing

False

In [None]:
dfm = dfm.sort_index()

dfm

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
0,x,0.613013
0,x,0.430864
1,y,0.345307
1,z,0.039586


In [None]:
dfm.index.is_monotonic_increasing

True

In [None]:
dfm.loc[(0, "y"):(1, "z")]

Unnamed: 0_level_0,Unnamed: 1_level_0,jolie
jim,joe,Unnamed: 2_level_1
1,y,0.345307
1,z,0.039586


In [None]:
index = pd.Index(np.random.randint(0, 1000, 10))

index

Int64Index([224, 823, 683, 774, 148, 32, 189, 107, 8, 643], dtype='int64')

In [None]:
positions = [0, 9, 3]

In [None]:
def my_fun(x,y):
    if y ==0:
       return 1
    else:
       return x* my_fun(x,y-1)
print (my_fun(2,4))

16


In [None]:
index[positions]

Int64Index([224, 643, 774], dtype='int64')

In [None]:
index.take(positions)

Int64Index([224, 643, 774], dtype='int64')

In [None]:
ser = pd.Series(np.random.randn(10))

ser.iloc[positions]

0    1.255160
9    0.597376
3    0.169687
dtype: float64

In [None]:
ser.take(positions)

0    1.255160
9    0.597376
3    0.169687
dtype: float64

In [None]:
frm = pd.DataFrame(np.random.randn(5, 3))
frm

Unnamed: 0,0,1,2
0,1.349281,0.534777,0.174138
1,1.802789,0.053396,0.087063
2,-0.009335,-0.423402,1.313322
3,3.184221,0.13239,1.253886
4,-1.701449,-1.093476,0.14336


In [None]:
frm.take([1, 4, 3])

Unnamed: 0,0,1,2
1,1.802789,0.053396,0.087063
4,-1.701449,-1.093476,0.14336
3,3.184221,0.13239,1.253886


In [None]:
frm.take([0, 2], axis=1)

Unnamed: 0,0,2
0,1.349281,0.174138
1,1.802789,0.087063
2,-0.009335,1.313322
3,3.184221,1.253886
4,-1.701449,0.14336


In [None]:
def my_fun(a, b, c):
    if a > b:
        if a > c:
           return a
    elif b > c:
          return b
    else:
         return c
print (my_fun(10,5,2))

10


In [None]:
arr = np.random.randn(10)

In [None]:
arr.take([False, False, True, True])

array([ 0.88472606,  0.88472606, -0.16190803, -0.16190803])

In [None]:
arr[[0, 1]]

array([ 0.88472606, -0.16190803])

In [None]:
ser = pd.Series(np.random.randn(10))

In [None]:

ser.take([False, False, True, True])

0   -1.107152
0   -1.107152
1    1.759012
1    1.759012
dtype: float64

In [None]:
ser.iloc[[0, 1]]

0   -1.107152
1    1.759012
dtype: float64

In [None]:
arr = np.random.randn(10000, 5)
arr

array([[-0.96177082,  1.29192998, -0.14930845,  0.34693051, -1.24120991],
       [-0.78574132,  1.25718042,  0.43100796, -0.1470033 , -3.01325125],
       [-0.37185595, -0.0699959 , -0.88640085, -0.06757875,  1.31405268],
       ...,
       [ 0.44690066,  0.43312999, -1.33683412,  1.85639686,  0.71831837],
       [-0.25500962, -0.32807963,  0.64220645, -0.03867111,  0.82753641],
       [-0.11245027, -0.69721057,  0.1772039 , -2.04423308,  0.53635849]])

In [None]:
indexer = np.arange(10000)
indexer

array([   0,    1,    2, ..., 9997, 9998, 9999])

In [None]:
random.shuffle(indexer)

In [None]:
%timeit arr[indexer]

357 µs ± 6.57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
%timeit arr.take(indexer, axis=0)

58.6 µs ± 1.53 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
ser = pd.Series(arr[:, 0])
ser

0      -0.961771
1      -0.785741
2      -0.371856
3      -0.031850
4       0.239589
          ...   
9995    1.154184
9996   -1.785873
9997    0.446901
9998   -0.255010
9999   -0.112450
Length: 10000, dtype: float64

In [None]:
%timeit ser.iloc[indexer]

193 µs ± 43.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%timeit ser.take(indexer)

178 µs ± 35.8 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
from pandas.api.types import CategoricalDtype

df = pd.DataFrame({"A": np.arange(6), "B": list("aabbca")})
df

Unnamed: 0,A,B
0,0,a
1,1,a
2,2,b
3,3,b
4,4,c
5,5,a


In [None]:

df["B"] = df["B"].astype(CategoricalDtype(list("cab")))
df['B']

0    a
1    a
2    b
3    b
4    c
5    a
Name: B, dtype: category
Categories (3, object): ['c', 'a', 'b']

In [None]:
df.dtypes

A       int64
B    category
dtype: object

In [None]:
df["B"].cat.categories

Index(['c', 'a', 'b'], dtype='object')

In [None]:
df2 = df.set_index("B")

df2.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['c', 'a', 'b'], ordered=False, dtype='category', name='B')

In [None]:
df2.loc["a"]

In [None]:
df2.loc["a"].index

In [None]:
df2.sort_index()

In [None]:
df2.groupby(level=0).sum()

In [None]:
df2.groupby(level=0).sum().index

In [None]:
df3 = pd.DataFrame(
    {"A": np.arange(3), "B": pd.Series(list("abc")).astype("category")}
)
df3

In [None]:
df3 = df3.set_index("B")

df3

In [None]:
df3.reindex(["a", "e"])

In [None]:
df3.reindex(["a", "e"]).index

Index(['a', 'e'], dtype='object', name='B')

In [None]:
df3.reindex(pd.Categorical(["a", "e"], categories=list("abe")))

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0.0
e,


In [None]:
df3.index

CategoricalIndex(['a', 'b', 'c'], categories=['a', 'b', 'c'], ordered=False, dtype='category', name='B')

In [None]:
df3.reindex(pd.Categorical(["a", "e"], categories=list("abe"))).index

CategoricalIndex(['a', 'e'], categories=['a', 'b', 'e'], ordered=False, dtype='category', name='B')

In [None]:
df4 = pd.DataFrame({"A": np.arange(2), "B": list("ba")})

In [None]:
df4["B"] = df4["B"].astype(CategoricalDtype(list("ab")))

In [None]:
df4 = df4.set_index("B")

In [None]:
df4.index

CategoricalIndex(['b', 'a'], categories=['a', 'b'], ordered=False, dtype='category', name='B')

In [None]:
df5 = pd.DataFrame({"A": np.arange(2), "B": list("bc")})

In [None]:
df5["B"] = df5["B"].astype(CategoricalDtype(list("bc")))

In [None]:
df5 = df5.set_index("B")

In [None]:
df5.index

CategoricalIndex(['b', 'c'], categories=['b', 'c'], ordered=False, dtype='category', name='B')

In [None]:
df4

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
a,1


In [None]:
df5

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
c,1


In [None]:
pd.concat([df4, df5])

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
b,0
a,1
b,0
c,1


In [None]:
idx = pd.RangeIndex(5)

idx

RangeIndex(start=0, stop=5, step=1)

In [None]:
ser = pd.Series([1, 2, 3])

ser.index

RangeIndex(start=0, stop=3, step=1)

In [None]:
df = pd.DataFrame([[1, 2], [3, 4]])

df.index

RangeIndex(start=0, stop=2, step=1)

In [None]:
df.columns

RangeIndex(start=0, stop=2, step=1)

In [None]:
idx[[0, 2]]

Int64Index([0, 2], dtype='int64')

In [None]:
df = pd.DataFrame(
    {"A": [1, 2, 3, 4]}, index=pd.IntervalIndex.from_breaks([0, 1, 2, 3, 4])
)


df

Unnamed: 0,A
"(0, 1]",1
"(1, 2]",2
"(2, 3]",3
"(3, 4]",4


In [None]:
df.loc[2]

A    2
Name: (1, 2], dtype: int64

In [None]:
df.loc[[2, 3]]

Unnamed: 0,A
"(1, 2]",2
"(2, 3]",3


In [None]:
df.loc[2.4]

A    3
Name: (2, 3], dtype: int64

In [None]:
df.loc[[2.5, 3.5]]

Unnamed: 0,A
"(2, 3]",3
"(3, 4]",4


In [None]:
df.loc[pd.Interval(1, 2)]

A    2
Name: (1, 2], dtype: int64

In [None]:
df.loc[pd.Interval(2, 3)]

A    3
Name: (2, 3], dtype: int64

In [None]:
idxr = df.index.overlaps(pd.Interval(0.5, 2.5))

idxr


df[idxr]

Unnamed: 0,A
"(0, 1]",1
"(1, 2]",2
"(2, 3]",3


In [None]:
c = pd.cut(range(4), bins=2)

c




c.categories

IntervalIndex([(-0.003, 1.5], (1.5, 3.0]], dtype='interval[float64, right]')

In [None]:
pd.cut([0, 3, 5, 1], bins=c.categories)

[(-0.003, 1.5], (1.5, 3.0], NaN, (-0.003, 1.5]]
Categories (2, interval[float64, right]): [(-0.003, 1.5] < (1.5, 3.0]]

In [None]:
pd.interval_range(start=0, end=5)

IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]], dtype='interval[int64, right]')

In [None]:
pd.interval_range(start=pd.Timestamp("2017-01-01"), periods=4)

IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03], (2017-01-03, 2017-01-04], (2017-01-04, 2017-01-05]], dtype='interval[datetime64[ns], right]')

In [None]:
pd.interval_range(end=pd.Timedelta("3 days"), periods=3)

IntervalIndex([(0 days 00:00:00, 1 days 00:00:00], (1 days 00:00:00, 2 days 00:00:00], (2 days 00:00:00, 3 days 00:00:00]], dtype='interval[timedelta64[ns], right]')

In [None]:
pd.interval_range(start=0, periods=5, freq=1.5)

IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0], (6.0, 7.5]], dtype='interval[float64, right]')

In [None]:
pd.interval_range(start=pd.Timestamp("2017-01-01"), periods=4, freq="W")

IntervalIndex([(2017-01-01, 2017-01-08], (2017-01-08, 2017-01-15], (2017-01-15, 2017-01-22], (2017-01-22, 2017-01-29]], dtype='interval[datetime64[ns], right]')

In [None]:
pd.interval_range(start=pd.Timedelta("0 days"), periods=3, freq="9H")

IntervalIndex([(0 days 00:00:00, 0 days 09:00:00], (0 days 09:00:00, 0 days 18:00:00], (0 days 18:00:00, 1 days 03:00:00]], dtype='interval[timedelta64[ns], right]')

In [None]:
pd.interval_range(start=0, end=4, closed="both")

IntervalIndex([[0, 1], [1, 2], [2, 3], [3, 4]], dtype='interval[int64, both]')

In [None]:
pd.interval_range(start=0, end=4, closed="neither")

IntervalIndex([(0, 1), (1, 2), (2, 3), (3, 4)], dtype='interval[int64, neither]')

In [None]:
pd.interval_range(start=0, end=6, periods=4)

IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]], dtype='interval[float64, right]')

In [None]:
s = pd.Series(range(5))

s[-1]

KeyError: ignored

In [None]:
df = pd.DataFrame(np.random.randn(5, 4))

df








df.loc[-2:]

Unnamed: 0,0,1,2,3
0,1.901633,1.296948,1.439141,0.631838
1,0.016967,-0.729545,0.638466,0.796695
2,0.590148,0.575638,-1.447507,0.338873
3,0.029279,0.554699,2.329684,0.299497
4,-0.202384,0.835021,0.015652,0.407807


In [None]:
df = pd.DataFrame(index=[2, 3, 3, 4, 5], columns=["data"], data=list(range(5)))
df

Unnamed: 0,data
2,0
3,1
3,2
4,3
5,4


In [None]:
df.index.is_monotonic_increasing

True

In [None]:

df.loc[0:4, :]

Unnamed: 0,data
2,0
3,1
3,2
4,3


In [None]:
df.loc[13:15, :]

Unnamed: 0,data


In [None]:
df = pd.DataFrame(index=[2, 3, 1, 4, 3, 5], columns=["data"], data=list(range(6)))

df.index.is_monotonic_increasing

False

In [None]:
df.loc[2:4, :]

Unnamed: 0,data
2,0
3,1
1,2
4,3


In [None]:
df.loc[0:4, :]

KeyError: ignored

In [None]:
df.loc[2:3, :]

KeyError: ignored

In [None]:
s = pd.Series(np.random.randn(6), index=list("abcdef"))

s

a   -0.267063
b   -0.085546
c   -0.036923
d    0.022569
e    1.205395
f    1.881749
dtype: float64

In [None]:
s[2:5]

c   -0.036923
d    0.022569
e    1.205395
dtype: float64

In [215]:
s.loc['c':'e' + 1]

TypeError: ignored

In [216]:
s.loc["c":"e"]

c   -0.036923
d    0.022569
e    1.205395
dtype: float64

In [217]:
series1 = pd.Series([1, 2, 3])

series1.dtype

dtype('int64')

In [218]:
res = series1.reindex([0, 4])

res.dtype

dtype('float64')

In [219]:
res

0    1.0
4    NaN
dtype: float64

In [220]:
series2 = pd.Series([True])

series2.dtype

dtype('bool')

In [221]:
res = series2.reindex_like(series1)

res.dtype

dtype('O')

In [222]:
res

0    True
1     NaN
2     NaN
dtype: object