# Multi-indexing

From https://pandas.pydata.org/pandas-docs/stable/advanced.html

Let's create a 

In [77]:
import pandas as pd
import numpy as np

index = pd.MultiIndex.from_product([['a', 'b', 'c', 'd'], range(2)])

index

MultiIndex(levels=[['a', 'b', 'c', 'd'], [0, 1]],
           labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]])

In [78]:
s = pd.Series(np.random.rand(8), index=index)

s

a  0    0.496772
   1    0.742495
b  0    0.523331
   1    0.329271
c  0    0.885319
   1    0.697709
d  0    0.392517
   1    0.447753
dtype: float64

In [79]:
s['a']

0    0.496772
1    0.742495
dtype: float64

## With dataframes

In [80]:

X = np.random.rand(50, 30)
arr = [["subject_1", "subject_2"], ["A", "B", "C", "D", "E"], range(5)]
index = pd.MultiIndex.from_product(arr)

In [81]:
df = pd.DataFrame(X, index=index)

df

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
subject_1,A,0,0.365754,0.257602,0.747467,0.718242,0.415452,0.413966,0.701428,0.437561,0.773046,0.400267,...,0.669998,0.044735,0.842109,0.956228,0.623322,0.639153,0.368671,0.676743,0.709019,0.548143
subject_1,A,1,0.433754,0.285803,0.018311,0.530171,0.134647,0.932722,0.096011,0.045606,0.768717,0.059824,...,0.486018,0.951943,0.351083,0.87278,0.548605,0.113195,0.091471,0.498396,0.04172,0.095716
subject_1,A,2,0.818512,0.709895,0.142232,0.564571,0.513693,0.603292,0.006447,0.584602,0.14163,0.069103,...,0.093621,0.163515,0.124988,0.160704,0.971036,0.672865,0.345021,0.916436,0.670764,0.179311
subject_1,A,3,0.176111,0.522479,0.311108,0.895926,0.715356,0.541824,0.442833,0.727237,0.454609,0.46536,...,0.773235,0.281895,0.723358,0.968839,0.929121,0.32367,0.752641,0.646362,0.297788,0.46356
subject_1,A,4,0.491437,0.430304,0.446894,0.525271,0.67374,0.145629,0.795214,0.356903,0.402093,0.402143,...,0.615165,0.71128,0.327847,0.827091,0.61143,0.422097,0.486812,0.046259,0.621346,0.489448
subject_1,B,0,0.72825,0.581208,0.64366,0.72789,0.715368,0.208319,0.664713,0.427634,0.081441,0.126499,...,0.915481,0.979141,0.305438,0.443109,0.971814,0.307722,0.03219,0.022827,0.771888,0.411618
subject_1,B,1,0.53983,0.72439,0.052664,0.03459,0.257066,0.016995,0.695838,0.96453,0.149745,0.899321,...,0.083615,0.640864,0.015928,0.541706,0.482994,0.583823,0.829953,0.459494,0.84448,0.994251
subject_1,B,2,0.015934,0.554282,0.983718,0.436304,0.935472,0.063589,0.30477,0.35402,0.22698,0.516349,...,0.778322,0.833647,0.077718,0.243944,0.221151,0.435393,0.783759,0.593003,0.763291,0.834652
subject_1,B,3,0.39932,0.901717,0.120443,0.572878,0.304654,0.382071,0.269672,0.22944,0.441445,0.18067,...,0.21844,0.572284,0.947834,0.709324,0.78165,0.408678,0.058825,0.008201,0.623527,0.543142
subject_1,B,4,0.376619,0.851754,0.616293,0.167494,0.322332,0.859227,0.298653,0.056529,0.554525,0.833865,...,0.269505,0.683552,0.871308,0.237167,0.217251,0.891868,0.872959,0.195865,0.465986,0.924169


## Merging


In [83]:

X = np.random.rand(60, 30)
arr = [["subject_3", "subject_4"], ["A", "B", "C", "D", "E", "F"], range(5)]
index = pd.MultiIndex.from_product(arr)

df2 = pd.DataFrame(X, index=index)

In [85]:
df3 = df.append(df2)

df3.to_csv()

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
subject_1,A,0,0.365754,0.257602,0.747467,0.718242,0.415452,0.413966,0.701428,0.437561,0.773046,0.400267,...,0.669998,0.044735,0.842109,0.956228,0.623322,0.639153,0.368671,0.676743,0.709019,0.548143
subject_1,A,1,0.433754,0.285803,0.018311,0.530171,0.134647,0.932722,0.096011,0.045606,0.768717,0.059824,...,0.486018,0.951943,0.351083,0.872780,0.548605,0.113195,0.091471,0.498396,0.041720,0.095716
subject_1,A,2,0.818512,0.709895,0.142232,0.564571,0.513693,0.603292,0.006447,0.584602,0.141630,0.069103,...,0.093621,0.163515,0.124988,0.160704,0.971036,0.672865,0.345021,0.916436,0.670764,0.179311
subject_1,A,3,0.176111,0.522479,0.311108,0.895926,0.715356,0.541824,0.442833,0.727237,0.454609,0.465360,...,0.773235,0.281895,0.723358,0.968839,0.929121,0.323670,0.752641,0.646362,0.297788,0.463560
subject_1,A,4,0.491437,0.430304,0.446894,0.525271,0.673740,0.145629,0.795214,0.356903,0.402093,0.402143,...,0.615165,0.711280,0.327847,0.827091,0.611430,0.422097,0.486812,0.046259,0.621346,0.489448
subject_1,B,0,0.728250,0.581208,0.643660,0.727890,0.715368,0.208319,0.664713,0.427634,0.081441,0.126499,...,0.915481,0.979141,0.305438,0.443109,0.971814,0.307722,0.032190,0.022827,0.771888,0.411618
subject_1,B,1,0.539830,0.724390,0.052664,0.034590,0.257066,0.016995,0.695838,0.964530,0.149745,0.899321,...,0.083615,0.640864,0.015928,0.541706,0.482994,0.583823,0.829953,0.459494,0.844480,0.994251
subject_1,B,2,0.015934,0.554282,0.983718,0.436304,0.935472,0.063589,0.304770,0.354020,0.226980,0.516349,...,0.778322,0.833647,0.077718,0.243944,0.221151,0.435393,0.783759,0.593003,0.763291,0.834652
subject_1,B,3,0.399320,0.901717,0.120443,0.572878,0.304654,0.382071,0.269672,0.229440,0.441445,0.180670,...,0.218440,0.572284,0.947834,0.709324,0.781650,0.408678,0.058825,0.008201,0.623527,0.543142
subject_1,B,4,0.376619,0.851754,0.616293,0.167494,0.322332,0.859227,0.298653,0.056529,0.554525,0.833865,...,0.269505,0.683552,0.871308,0.237167,0.217251,0.891868,0.872959,0.195865,0.465986,0.924169


## Multi-indexing and HDF

It seems that multi-indexing works ok with HDF

In [71]:

hdf = pd.HDFStore('multi-index.h5')

hdf.put('example', df, format='t')

In [72]:
hdf.get('example')

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
subject_1,A,0,0.207823,0.916843,0.37247,0.396539,0.47444,0.72486,0.858862,0.670795,0.778789,0.010285,...,0.231046,0.891293,0.852641,0.54015,0.434263,0.813703,0.701898,0.015876,0.447548,0.964585
subject_1,A,1,0.463786,0.6324,0.059129,0.498907,0.041536,0.484032,0.499308,0.325908,0.865494,0.613429,...,0.75808,0.616337,0.109862,0.15682,0.667481,0.439687,0.871646,0.240708,0.733048,0.580597
subject_1,A,2,0.765127,0.640595,0.242076,0.765173,0.89165,0.273874,0.223714,0.778486,0.862134,0.715773,...,0.607794,0.649143,0.419684,0.342457,0.817026,0.812522,0.435383,0.176688,0.075498,0.339161
subject_1,A,3,0.09568,0.576557,0.475075,0.994494,0.618002,0.412314,0.94174,0.470414,0.64323,0.298637,...,0.671736,0.642761,0.863304,0.904288,0.936881,0.148901,0.833896,0.697333,0.458743,0.347211
subject_1,A,4,0.226074,0.796186,0.903332,0.312946,0.930211,0.827253,0.558492,0.987897,0.664086,0.670361,...,0.880062,0.78724,0.030715,0.438305,0.727437,0.307584,0.772467,0.989688,0.491692,0.32034
subject_1,B,0,0.544976,0.083489,0.692659,0.861375,0.887937,0.73973,0.71459,0.431992,0.877201,0.152575,...,0.225304,0.300565,0.061403,0.585126,0.284416,0.188656,0.160956,0.318082,0.449165,0.645533
subject_1,B,1,0.71642,0.381529,0.181159,0.506813,0.914409,0.635306,0.351733,0.612552,0.402212,0.424757,...,0.018189,0.737562,0.784725,0.29324,0.404531,0.049005,0.988511,0.928382,0.144863,0.834522
subject_1,B,2,0.199385,0.767969,0.824638,0.66847,0.309265,0.451619,0.303531,0.709193,0.985862,0.366158,...,0.046586,0.100863,0.542113,0.791204,0.635919,0.962847,0.173197,0.991398,0.023713,0.482138
subject_1,B,3,0.947714,0.434508,0.39788,0.814668,0.468556,0.05915,0.809299,0.263397,0.011295,0.620477,...,0.895958,0.230931,0.244233,0.989458,0.45952,0.583741,0.78215,0.709225,0.079607,0.021273
subject_1,B,4,0.236304,0.306879,0.001164,0.723943,0.698881,0.743105,0.00882,0.245047,0.053134,0.890684,...,0.24271,0.250967,0.254253,0.448605,0.917237,0.675641,0.761456,0.47715,0.634377,0.215531


In [73]:
hdf.put('example', df2, format='t', append=True)

In [76]:
hdf.get('example').loc['subject_3']

Unnamed: 0,Unnamed: 1,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
A,0,0.029606,0.7072,0.542055,0.971152,0.703342,0.86549,0.421489,0.560648,0.410364,0.275535,...,0.594016,0.758589,0.041672,0.052853,0.679443,0.196396,0.028415,0.118473,0.952959,0.262009
A,1,0.161551,0.975367,0.92054,0.020936,0.630926,0.480104,0.384228,0.247375,0.558417,0.305004,...,0.935395,0.897285,0.67836,0.38186,0.364218,0.19158,0.850235,0.504481,0.60219,0.316347
A,2,0.124357,0.873633,0.025189,0.183579,0.526348,0.990631,0.919261,0.470703,0.678327,0.335967,...,0.71446,0.875597,0.409052,0.1649,0.088174,0.359298,0.878654,0.915819,0.256742,0.066638
A,3,0.078059,0.268247,0.969339,0.341003,0.379935,0.036823,0.841245,0.16124,0.039413,0.948624,...,0.080223,0.924471,0.839615,0.751686,0.628923,0.856105,0.987598,0.914002,0.62548,0.993064
A,4,0.159577,0.223066,0.509655,0.31079,0.877294,0.236254,0.057952,0.434536,0.599556,0.387732,...,0.977429,0.970096,0.193041,0.714065,0.652942,0.034229,0.487249,0.550323,0.936898,0.814108
B,0,0.703886,0.080318,0.729994,0.698559,0.076998,0.135357,0.879196,0.532211,0.003586,0.975825,...,0.709716,0.209753,0.97293,0.296825,0.643613,0.482891,0.037324,0.62077,0.172579,0.605008
B,1,0.626524,0.818084,0.464481,0.498366,0.307227,0.689453,0.592125,0.182187,0.735476,0.420345,...,0.985274,0.178314,0.563861,0.560953,0.428647,0.043512,0.418675,0.302405,0.643073,0.946764
B,2,0.002702,0.726291,0.780057,0.624312,0.801507,0.505574,0.257303,0.227188,0.590456,0.541709,...,0.4298,0.918929,0.646874,0.490529,0.531457,0.111662,0.040227,0.76181,0.91764,0.358326
B,3,0.187012,0.125148,0.450714,0.919551,0.982052,0.998237,0.720618,0.271064,0.729186,0.938142,...,0.374268,0.34248,0.615871,0.089078,0.243169,0.027864,0.786013,0.768117,0.882163,0.704338
B,4,0.794044,0.323872,0.006749,0.520589,0.451142,0.403872,0.703801,0.539991,0.9818,0.652932,...,0.334037,0.715806,0.275765,0.520072,0.745372,0.554692,0.762404,0.139289,0.87453,0.626737
