In [1]:
import numpy as np
import pandas

# Hierarchical Indexing

In [9]:
# code
data = pandas.Series(
    np.arange(9),
    index=[['a', 'a', 'a', 'b', 'b', 'c', 'c', 'd', 'd'],
           [1, 2, 3, 1, 3, 1, 2, 2, 3]]
)

o = [
    data, '','data.index',
    data.index, '','data["b"]',
    # can query the multi-index
    data['b'], '','data["b":"c"]',
    data['b':'c'], '','data.loc[["b","d"]]',
    data.loc[['b', 'd']], '','data.loc[:, 2]',
    data.loc[:, 2], '','data.unstack',
    data.unstack(), '',''
]
o

[a  1    0
    2    1
    3    2
 b  1    3
    3    4
 c  1    5
    2    6
 d  2    7
    3    8
 dtype: int64,
 '',
 'data.index',
 MultiIndex([('a', 1),
             ('a', 2),
             ('a', 3),
             ('b', 1),
             ('b', 3),
             ('c', 1),
             ('c', 2),
             ('d', 2),
             ('d', 3)],
            ),
 '',
 'data["b"]',
 1    3
 3    4
 dtype: int64,
 '',
 'data["b":"c"]',
 b  1    3
    3    4
 c  1    5
    2    6
 dtype: int64,
 '',
 'data.loc[["b","d"]]',
 b  1    3
    3    4
 d  2    7
    3    8
 dtype: int64,
 '',
 'data.loc[:, 2]',
 a    1
 c    6
 d    7
 dtype: int64,
 '',
 'data.unstack',
      1    2    3
 a  0.0  1.0  2.0
 b  3.0  NaN  4.0
 c  5.0  6.0  NaN
 d  NaN  7.0  8.0,
 '',
 '']

In [15]:
# dataframes
frame = pandas.DataFrame(np.arange(12).reshape(4,3),
                         # ['a', 'a', 'b', 'b']
                        index=[[1,1,2,2], [1, 2, 1, 2]],
                        columns=[['Ohio', 'Ohio', 'Colorado'],
                                ['Green', 'Red', 'Green']])
frame.index.names = ['phase', 'group']
frame.columns.names = ['state', 'color']
o = [
    frame, '','frame["Ohio"]',
    frame['Ohio'], '','',
]
o

[state        Ohio     Colorado
 color       Green Red    Green
 phase group                   
 1     1         0   1        2
       2         3   4        5
 2     1         6   7        8
       2         9  10       11,
 '',
 'frame["Ohio"]',
 color        Green  Red
 phase group            
 1     1          0    1
       2          3    4
 2     1          6    7
       2          9   10]

In [14]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
phase,group,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,1,0,1,2
1,2,3,4,5
2,1,6,7,8
2,2,9,10,11


## Reordering and Sorting Levels

In [19]:
# methods to rem
"""
.swaplevel()
.sortindex()
"""

# code
o = [
    frame, '',"frame.swaplevel('group', 'phase')",
    frame.swaplevel('group', 'phase'), '','frame.sort_index(level=1)',
    frame.sort_index(level=1), '',"frame.swaplevel('phase','group').sort_index(level=0)",
    frame.swaplevel('phase','group').sort_index(level=0)
]
o

[state        Ohio     Colorado
 color       Green Red    Green
 phase group                   
 1     1         0   1        2
       2         3   4        5
 2     1         6   7        8
       2         9  10       11,
 '',
 "frame.swaplevel('group', 'phase')",
 state        Ohio     Colorado
 color       Green Red    Green
 group phase                   
 1     1         0   1        2
 2     1         3   4        5
 1     2         6   7        8
 2     2         9  10       11,
 '',
 'frame.sort_index(level=1)',
 state        Ohio     Colorado
 color       Green Red    Green
 phase group                   
 1     1         0   1        2
 2     1         6   7        8
 1     2         3   4        5
 2     2         9  10       11,
 '',
 "frame.swaplevel('phase','group').sort_index(level=0)",
 state        Ohio     Colorado
 color       Green Red    Green
 group phase                   
 1     1         0   1        2
       2         6   7        8
 2     1         3   4   

## Summary Statistics by Level

In [None]:
# code

## Indexing with a Dataframes columns

In [None]:
# code

# Combining and Merging Datasets
## Database-Style DataFrame Joins

In [None]:
df1 = pandas.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'], 'data1': range(7)})
df2 = pandas.DataFrame({'key': ['a', 'b', 'd'], 'data2': np.arange(3) + 100})
df3 = pandas.DataFrame({'lkey': ['b', 'b', 'a', 'c', 'a', 'a', 'b'], 'data1': range(7)})
df4 = pandas.DataFrame({'rkey': ['a', 'b', 'd'], 'data2': range(3)})

o = [
    '','',
    df1, '','',
    df2, '','pandas.merge(df1, df2, on="key")',
    pandas.merge(df1, df2, on='key'), '','pandas.merge(df3, df4, left_on="lkey", right_on="rkey")',
    pandas.merge(df3, df4, left_on='lkey', right_on='rkey'), '','.merge(..., how="outer")',
    pandas.merge(df1, df2, how='outer')
]
o

In [None]:
# many-to-many joins, form the cartesian product of the rows
df1 = pandas.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'b'], 'data1': range(6)})
df2 = pandas.DataFrame({'key': ['a', 'b', 'a', 'b', 'd'], 'data2': range(5)})
left = pandas.DataFrame({'key1': ['foo', 'foo', 'bar'],
                         'key2': ['one', 'two', 'one'],
                         'lval': [1, 2, 3]})
right = pandas.DataFrame({'key1': ['foo', 'foo', 'bar', 'bar'],
                          'key2': ['one', 'one', 'one', 'two'],
                          'rval': [4, 5, 6, 7]})
left1 = pandas.DataFrame({'key': ['a', 'b', 'a', 'a', 'b', 'c'], 'value': range(6)})
right1 = pandas.DataFrame({'group_val': [10.5, 20.5]}, index=['a', 'b'])

o = [
    '','pandas.merge(df1, df2, on="key", how="left")',
    pandas.merge(df1, df2, on='key', how='left'), '','pandas.merge(df1, df2, how="inner")',
    pandas.merge(df1, df2, on='key', how='inner'), '','left',
    left, '','right',
    right, '','pandas.merge(left, right, on=["key1","key2"], how="outer")',
    pandas.merge(left, right, on=['key1', 'key2'], how='outer'), '','pandas.merge(left, right, on="key1", suffixes=("_left","_right"))',
    pandas.merge(left, right, on='key1', suffixes=('_left', '_right')), '','left1',
    left1, '','right1',
    right1, '','pandas.merge(left1, right1, left_on="key", right_index=True)',
    pandas.merge(left1, right1, left_on='key', right_index=True), '','.merge(L, R, left_key="key", right_index=True, how="outer"',
    pandas.merge(left1, right1, left_on='key', right_index=True, how='outer'),

]
o

## Merging on Index

In [None]:
lefth = pandas.DataFrame({'key1': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
                          'key2': [2000, 2001, 2002, 2001, 2002],
                          'data': np.arange(5.)})
righth = pandas.DataFrame(np.arange(12.).reshape((6, 2)),
                          index=[['Nevada', 'Nevada', 'Ohio', 'Ohio', 'Ohio', 'Ohio'],
                                 [2001, 2000, 2000, 2000, 2001, 2002]],
                          columns=['event1', 'event2'])
left2 = pandas.DataFrame([[1., 2.], [3., 4.], [5., 6.]], index=['a', 'c', 'e'], columns=['Ohio', 'Nevada'])
right2 = pandas.DataFrame([[7., 8.], [9., 10.], [11., 12.], [13, 14]], index=['b', 'c', 'd', 'e'], columns=['Missouri', 'Alabama'])
o = [
    '','lefth',
    lefth, '','righth',
    righth, '','.merge(L, R, left_on=["key1","key2"], right_index=True, how="outer")',
    pandas.merge(lefth, righth, left_on=['key1', 'key2'], right_index=True, how='outer'), '','left2',
    left2, '','right2',
    right2, '','.merge(L, R, how="outer", left_index=True, right_index=True)',
    pandas.merge(left2, right2, how='outer', left_index=True, right_index=True), '','',
]
o

## Concatenating Along an Axis

In [None]:
# code

## Combining Data w/Overlap

In [None]:
# code

# Reshaping and Pivoting

In [None]:
# code

## Reshaping with Hierarchical Indexing

In [None]:
# code

## Pivoting "Long" to "Wide" Format

In [None]:
# code

## Pivoting "Wide" to "Long" Format

In [None]:
# code
