# MultiLindexing

In [2]:
import pandas as pd
from pandas import DataFrame, Series
import numpy as np

In [3]:
df = DataFrame({'row': [0, 1, 2],
                'One_X': [1.1] * 3,
                'One_Y': [1.2] * 3,
                'Two_X': [1.11] * 3,
                'Two_Y': [1.22] * 3}) ; df

Unnamed: 0,One_X,One_Y,Two_X,Two_Y,row
0,1.1,1.2,1.11,1.22,0
1,1.1,1.2,1.11,1.22,1
2,1.1,1.2,1.11,1.22,2


In [4]:
df = df.set_index('row') ;df

Unnamed: 0_level_0,One_X,One_Y,Two_X,Two_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [5]:
df.columns = pd.MultiIndex.from_tuples([tuple(c.split('_')) for c in df.columns]); df

Unnamed: 0_level_0,One,One,Two,Two
Unnamed: 0_level_1,X,Y,X,Y
row,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
0,1.1,1.2,1.11,1.22
1,1.1,1.2,1.11,1.22
2,1.1,1.2,1.11,1.22


In [6]:
df_st = df.stack(level=0) ; df_st

Unnamed: 0_level_0,Unnamed: 1_level_0,X,Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


In [7]:
df = df_st.reset_index(1) ; df

Unnamed: 0_level_0,level_1,X,Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


In [8]:
df.columns = ['Sample', 'All_x', 'All_Y'] ; df

Unnamed: 0_level_0,Sample,All_x,All_Y
row,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,One,1.1,1.2
0,Two,1.11,1.22
1,One,1.1,1.2
1,Two,1.11,1.22
2,One,1.1,1.2
2,Two,1.11,1.22


## Arithmetic

In [9]:
[(x,y) for x in ['a', 'b', 'c'] for y in ['o', 'i']]

[('a', 'o'), ('a', 'i'), ('b', 'o'), ('b', 'i'), ('c', 'o'), ('c', 'i')]

In [10]:
cols = pd.MultiIndex.from_tuples([(x,y) for x in ['A', 'B', 'C'] for y in ['O', 'I']]); cols

MultiIndex(levels=[['A', 'B', 'C'], ['I', 'O']],
           labels=[[0, 0, 1, 1, 2, 2], [1, 0, 1, 0, 1, 0]])

In [11]:
df = pd.DataFrame(np.random.randn(2, 6), index=['n', 'm'], columns=cols); df

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,O,I,O,I,O,I
n,0.428096,-0.400505,-0.321762,-1.64222,0.823499,0.18064
m,-0.797574,0.170322,0.721776,1.088945,0.59741,-0.437552


In [12]:
df = df.div(df['C'], level=1) ; df

Unnamed: 0_level_0,A,A,B,B,C,C
Unnamed: 0_level_1,O,I,O,I,O,I
n,0.51985,-2.217139,-0.390725,-9.091099,1.0,1.0
m,-1.335055,-0.389262,1.208176,-2.488721,1.0,1.0


## Slicing

In [15]:
coords = [('AA', 'one'), ('AA','six'),('BB','one'),('BB','two'),('BB','six')] ; coords

[('AA', 'one'), ('AA', 'six'), ('BB', 'one'), ('BB', 'two'), ('BB', 'six')]

In [16]:
index = pd.MultiIndex.from_tuples(coords) ; index

MultiIndex(levels=[['AA', 'BB'], ['one', 'six', 'two']],
           labels=[[0, 0, 1, 1, 1], [0, 1, 0, 2, 1]])

In [19]:
df = pd.DataFrame(data = [11, 22, 33, 44, 55], index = index, columns = ['MyData']) ; df

Unnamed: 0,Unnamed: 1,MyData
AA,one,11
AA,six,22
BB,one,33
BB,two,44
BB,six,55


In [23]:
df.xs('BB', level=0, axis=0)

Unnamed: 0,MyData
one,33
two,44
six,55


In [26]:
df.xs('six', level=1, axis=0)

Unnamed: 0,MyData
AA,22
BB,55


In [31]:
import itertools

In [32]:
index = list(itertools.product(['Ada', 'Quinn', 'Violet'], ['Comp', 'Math' 'Sci'])) ; index

[('Ada', 'Comp'),
 ('Ada', 'MathSci'),
 ('Quinn', 'Comp'),
 ('Quinn', 'MathSci'),
 ('Violet', 'Comp'),
 ('Violet', 'MathSci')]

## Sorting

## Levels

## panelnd

---

## pandas.DataFrame.set_index

In [None]:
help(pd.DataFrame.set_index)

## pandas.MultiIndex.from_tuples

In [None]:
help(pd.MultiIndex.from_tuples)

## str.split

In [None]:
help(str.split)

## pandas.DataFrame.stack

In [None]:
help(pd.DataFrame.stack)

## pandas.DataFrame.reset_index

In [None]:
help(pd.DataFrame.reset_index)

##  pandas.DataFrame.div

In [None]:
help(pd.DataFrame.div)

## pandas.DataFrame.xs

In [21]:
help(pd.DataFrame.xs)

Help on function xs in module pandas.core.generic:

xs(self, key, axis=0, level=None, drop_level=True)
    Returns a cross-section (row(s) or column(s)) from the
    Series/DataFrame. Defaults to cross-section on the rows (axis=0).
    
    Parameters
    ----------
    key : object
        Some label contained in the index, or partially in a MultiIndex
    axis : int, default 0
        Axis to retrieve cross-section on
    level : object, defaults to first n levels (n=1 or len(key))
        In case of a key partially contained in a MultiIndex, indicate
        which levels are used. Levels can be referred by label or position.
    drop_level : boolean, default True
        If False, returns object with same levels as self.
    
    Examples
    --------
    >>> df
       A  B  C
    a  4  5  2
    b  4  0  9
    c  9  7  3
    >>> df.xs('a')
    A    4
    B    5
    C    2
    Name: a
    >>> df.xs('C', axis=1)
    a    2
    b    9
    c    3
    Name: C
    
    >>> df
            