# Multi-indexing

From https://pandas.pydata.org/pandas-docs/stable/advanced.html

Let's create a 

In [5]:
import pandas as pd
import numpy as np

index = pd.MultiIndex.from_product([['a', 'b', 'c', 'd'], range(2)])

index

MultiIndex(levels=[['a', 'b', 'c', 'd'], [0, 1]],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]])

In [9]:
s = pd.Series(np.random.rand(8), index=index)

s

a  0    0.672277
   1    0.100155
b  0    0.099823
   1    0.233274
c  0    0.487669
   1    0.921285
d  0    0.793593
   1    0.665018
dtype: float64

In [10]:
s['a']

0    0.672277
1    0.100155
dtype: float64

## With dataframes

In [37]:

X = np.random.rand(50, 30)
arr = [["subject_1", "subject_2"], ["A", "B", "C", "D", "E"], range(5)]
index = pd.MultiIndex.from_product(arr)

In [38]:
df = pd.DataFrame(X, index=index)

df

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
subject_1,A,0,0.207823,0.916843,0.37247,0.396539,0.47444,0.72486,0.858862,0.670795,0.778789,0.010285,...,0.231046,0.891293,0.852641,0.54015,0.434263,0.813703,0.701898,0.015876,0.447548,0.964585
subject_1,A,1,0.463786,0.6324,0.059129,0.498907,0.041536,0.484032,0.499308,0.325908,0.865494,0.613429,...,0.75808,0.616337,0.109862,0.15682,0.667481,0.439687,0.871646,0.240708,0.733048,0.580597
subject_1,A,2,0.765127,0.640595,0.242076,0.765173,0.89165,0.273874,0.223714,0.778486,0.862134,0.715773,...,0.607794,0.649143,0.419684,0.342457,0.817026,0.812522,0.435383,0.176688,0.075498,0.339161
subject_1,A,3,0.09568,0.576557,0.475075,0.994494,0.618002,0.412314,0.94174,0.470414,0.64323,0.298637,...,0.671736,0.642761,0.863304,0.904288,0.936881,0.148901,0.833896,0.697333,0.458743,0.347211
subject_1,A,4,0.226074,0.796186,0.903332,0.312946,0.930211,0.827253,0.558492,0.987897,0.664086,0.670361,...,0.880062,0.78724,0.030715,0.438305,0.727437,0.307584,0.772467,0.989688,0.491692,0.32034
subject_1,B,0,0.544976,0.083489,0.692659,0.861375,0.887937,0.73973,0.71459,0.431992,0.877201,0.152575,...,0.225304,0.300565,0.061403,0.585126,0.284416,0.188656,0.160956,0.318082,0.449165,0.645533
subject_1,B,1,0.71642,0.381529,0.181159,0.506813,0.914409,0.635306,0.351733,0.612552,0.402212,0.424757,...,0.018189,0.737562,0.784725,0.29324,0.404531,0.049005,0.988511,0.928382,0.144863,0.834522
subject_1,B,2,0.199385,0.767969,0.824638,0.66847,0.309265,0.451619,0.303531,0.709193,0.985862,0.366158,...,0.046586,0.100863,0.542113,0.791204,0.635919,0.962847,0.173197,0.991398,0.023713,0.482138
subject_1,B,3,0.947714,0.434508,0.39788,0.814668,0.468556,0.05915,0.809299,0.263397,0.011295,0.620477,...,0.895958,0.230931,0.244233,0.989458,0.45952,0.583741,0.78215,0.709225,0.079607,0.021273
subject_1,B,4,0.236304,0.306879,0.001164,0.723943,0.698881,0.743105,0.00882,0.245047,0.053134,0.890684,...,0.24271,0.250967,0.254253,0.448605,0.917237,0.675641,0.761456,0.47715,0.634377,0.215531


## Merging


In [52]:

X = np.random.rand(50, 30)
arr = [["subject_3", "subject_4"], ["A", "B", "C", "D", "E"], range(5)]
index = pd.MultiIndex.from_product(arr)

df2 = pd.DataFrame(X, index=index)

In [68]:
df3 = df.append(df2)

df3.index

MultiIndex(levels=[['subject_1', 'subject_2', 'subject_3', 'subject_4'], ['A', 'B', 'C', 'D', 'E'], [0, 1, 2, 3, 4]],
           labels=[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3], [0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4], [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 

## Multi-indexing and HDF

It seems that multi-indexing works ok with HDF

In [71]:

hdf = pd.HDFStore('multi-index.h5')

hdf.put('example', df, format='t')

In [72]:
hdf.get('example')

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
subject_1,A,0,0.207823,0.916843,0.37247,0.396539,0.47444,0.72486,0.858862,0.670795,0.778789,0.010285,...,0.231046,0.891293,0.852641,0.54015,0.434263,0.813703,0.701898,0.015876,0.447548,0.964585
subject_1,A,1,0.463786,0.6324,0.059129,0.498907,0.041536,0.484032,0.499308,0.325908,0.865494,0.613429,...,0.75808,0.616337,0.109862,0.15682,0.667481,0.439687,0.871646,0.240708,0.733048,0.580597
subject_1,A,2,0.765127,0.640595,0.242076,0.765173,0.89165,0.273874,0.223714,0.778486,0.862134,0.715773,...,0.607794,0.649143,0.419684,0.342457,0.817026,0.812522,0.435383,0.176688,0.075498,0.339161
subject_1,A,3,0.09568,0.576557,0.475075,0.994494,0.618002,0.412314,0.94174,0.470414,0.64323,0.298637,...,0.671736,0.642761,0.863304,0.904288,0.936881,0.148901,0.833896,0.697333,0.458743,0.347211
subject_1,A,4,0.226074,0.796186,0.903332,0.312946,0.930211,0.827253,0.558492,0.987897,0.664086,0.670361,...,0.880062,0.78724,0.030715,0.438305,0.727437,0.307584,0.772467,0.989688,0.491692,0.32034
subject_1,B,0,0.544976,0.083489,0.692659,0.861375,0.887937,0.73973,0.71459,0.431992,0.877201,0.152575,...,0.225304,0.300565,0.061403,0.585126,0.284416,0.188656,0.160956,0.318082,0.449165,0.645533
subject_1,B,1,0.71642,0.381529,0.181159,0.506813,0.914409,0.635306,0.351733,0.612552,0.402212,0.424757,...,0.018189,0.737562,0.784725,0.29324,0.404531,0.049005,0.988511,0.928382,0.144863,0.834522
subject_1,B,2,0.199385,0.767969,0.824638,0.66847,0.309265,0.451619,0.303531,0.709193,0.985862,0.366158,...,0.046586,0.100863,0.542113,0.791204,0.635919,0.962847,0.173197,0.991398,0.023713,0.482138
subject_1,B,3,0.947714,0.434508,0.39788,0.814668,0.468556,0.05915,0.809299,0.263397,0.011295,0.620477,...,0.895958,0.230931,0.244233,0.989458,0.45952,0.583741,0.78215,0.709225,0.079607,0.021273
subject_1,B,4,0.236304,0.306879,0.001164,0.723943,0.698881,0.743105,0.00882,0.245047,0.053134,0.890684,...,0.24271,0.250967,0.254253,0.448605,0.917237,0.675641,0.761456,0.47715,0.634377,0.215531


In [70]:
hdf.put('example', df2, format='t')