In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.Series(np.random.randn(9),
                 index=[['a','a','a','b','b','c','c','d','d'],
                        [1,2,3,1,3,1,2,2,3]])

In [3]:
data

a  1   -1.039821
   2    0.542046
   3   -1.453683
b  1    0.556923
   3   -1.721550
c  1    0.178116
   2    0.516155
d  2   -0.147345
   3    0.682790
dtype: float64

In [4]:
data.index.levels

FrozenList([['a', 'b', 'c', 'd'], [1, 2, 3]])

In [5]:
data[['b','d']]

b  1    0.556923
   3   -1.721550
d  2   -0.147345
   3    0.682790
dtype: float64

In [6]:
data.loc[:,2]

a    0.542046
c    0.516155
d   -0.147345
dtype: float64

In [7]:
data.unstack()

Unnamed: 0,1,2,3
a,-1.039821,0.542046,-1.453683
b,0.556923,,-1.72155
c,0.178116,0.516155,
d,,-0.147345,0.68279


In [8]:
data.unstack().stack()

a  1   -1.039821
   2    0.542046
   3   -1.453683
b  1    0.556923
   3   -1.721550
c  1    0.178116
   2    0.516155
d  2   -0.147345
   3    0.682790
dtype: float64

In [9]:
frame = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=[['a','a','b','b'],[1,2,1,2]],
                     columns=[['Ohio','Ohio','Colorado'],['Green','Red','Green']])

In [10]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [11]:
frame.index.names = ['key1','key2']

In [12]:
frame.columns.names = ['state','color']

In [13]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [14]:
frame.unstack([0,1])

state     color  key1  key2
Ohio      Green  a     1        0
                       2        3
                 b     1        6
                       2        9
          Red    a     1        1
                       2        4
                 b     1        7
                       2       10
Colorado  Green  a     1        2
                       2        5
                 b     1        8
                       2       11
dtype: int32

In [15]:
frame.Ohio

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [16]:
pd.MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],
                          names=['states','color'])

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['states', 'color'])

In [17]:
data.unstack()

Unnamed: 0,1,2,3
a,-1.039821,0.542046,-1.453683
b,0.556923,,-1.72155
c,0.178116,0.516155,
d,,-0.147345,0.68279


In [18]:
newdata = pd.DataFrame(np.random.randn(8),
                       index=[['one','one','one','one','two','two','two','two'],
                              ['a','a','b','b']*2,[1,2]*4])

In [19]:
newdata.unstack(2,)

Unnamed: 0_level_0,Unnamed: 1_level_0,0,0
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2
one,a,0.648126,-1.218092
one,b,1.1965,1.378525
two,a,0.822881,1.35032
two,b,2.765877,-1.213046


In [20]:
newdf = pd.DataFrame(np.random.randn(5,5),
                     index=[['a','a','a','b','b'],
                            [1,2,3,1,2]],
                     columns=[['one','one','two','two','three'],
                              ['x','y','x','y','x']])

In [21]:
newdf

Unnamed: 0_level_0,Unnamed: 1_level_0,one,one,two,two,three
Unnamed: 0_level_1,Unnamed: 1_level_1,x,y,x,y,x
a,1,-0.500822,0.498983,0.078916,-0.61618,-0.976607
a,2,-0.754857,0.418009,-1.795989,-0.317034,-1.238459
a,3,-0.750654,-0.519201,-0.222302,-0.34835,0.325582
b,1,-0.652289,-0.17376,0.066563,-0.727387,-0.117952
b,2,0.97306,0.904312,0.416623,-0.917301,0.635753


In [22]:
newdf.stack([1,0],dropna=True)

a  1  x  one     -0.500822
         three   -0.976607
         two      0.078916
      y  one      0.498983
         two     -0.616180
   2  x  one     -0.754857
         three   -1.238459
         two     -1.795989
      y  one      0.418009
         two     -0.317034
   3  x  one     -0.750654
         three    0.325582
         two     -0.222302
      y  one     -0.519201
         two     -0.348350
b  1  x  one     -0.652289
         three   -0.117952
         two      0.066563
      y  one     -0.173760
         two     -0.727387
   2  x  one      0.973060
         three    0.635753
         two      0.416623
      y  one      0.904312
         two     -0.917301
dtype: float64

In [23]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [24]:
frame['Ohio','Green']

key1  key2
a     1       0
      2       3
b     1       6
      2       9
Name: (Ohio, Green), dtype: int32

In [25]:
frame.iloc[1]

state     color
Ohio      Green    3
          Red      4
Colorado  Green    5
Name: (a, 2), dtype: int32

In [26]:
frame['Ohio']

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [27]:
iterable = [['a','b','c','d'],['one','two'],[1,2,3]]

In [28]:
pd.MultiIndex.from_product(iterable)

MultiIndex([('a', 'one', 1),
            ('a', 'one', 2),
            ('a', 'one', 3),
            ('a', 'two', 1),
            ('a', 'two', 2),
            ('a', 'two', 3),
            ('b', 'one', 1),
            ('b', 'one', 2),
            ('b', 'one', 3),
            ('b', 'two', 1),
            ('b', 'two', 2),
            ('b', 'two', 3),
            ('c', 'one', 1),
            ('c', 'one', 2),
            ('c', 'one', 3),
            ('c', 'two', 1),
            ('c', 'two', 2),
            ('c', 'two', 3),
            ('d', 'one', 1),
            ('d', 'one', 2),
            ('d', 'one', 3),
            ('d', 'two', 1),
            ('d', 'two', 2),
            ('d', 'two', 3)],
           )

In [29]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [30]:
frame.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [31]:
frame.swaplevel('key1','key2')

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
2,a,3,4,5
1,b,6,7,8
2,b,9,10,11


In [32]:
frame.sort_index(level=1)

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
b,1,6,7,8
a,2,3,4,5
b,2,9,10,11


In [33]:
frame.swaplevel(0,1,axis=1,).sort_index(level=1,axis=1,)

Unnamed: 0_level_0,color,Green,Green,Red
Unnamed: 0_level_1,state,Colorado,Ohio,Ohio
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,2,0,1
a,2,5,3,4
b,1,8,6,7
b,2,11,9,10


In [34]:
frame = pd.DataFrame({'a':range(7),'b':range(7,0,-1),
                      'c':['one','one','two','two','three','three','three'],
                      'd':[0,1,2,0,1,2,3]})

In [35]:
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,two,2
3,3,4,two,0
4,4,3,three,1
5,5,2,three,2
6,6,1,three,3


In [36]:
frame2 = frame.set_index(['c','d'],verify_integrity=True,)

In [37]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
two,2,2,5
two,0,3,4
three,1,4,3
three,2,5,2
three,3,6,1


In [38]:
frame2.reset_index([0,1])

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,two,2,2,5
3,two,0,3,4
4,three,1,4,3
5,three,2,5,2
6,three,3,6,1


In [39]:
myframe = pd.DataFrame(np.random.randn(6,6),
                       index=[['a','a','a','b','b','b'],[1,1,2]*2,['one','two','one']*2],
                       columns=[['一','一','一','二','二','二'],['q','q','w','q','q','w'],['p','o','p']*2])

In [40]:
myframe.index.names = ['a-z','1-9','one-nine']

In [41]:
myframe.columns.names = ['一 ~ 九','q-p','p-q']

In [42]:
myframe

Unnamed: 0_level_0,Unnamed: 1_level_0,一 ~ 九,一,一,一,二,二,二
Unnamed: 0_level_1,Unnamed: 1_level_1,q-p,q,q,w,q,q,w
Unnamed: 0_level_2,Unnamed: 1_level_2,p-q,p,o,p,p,o,p
a-z,1-9,one-nine,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
a,1,one,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
a,1,two,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
a,2,one,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
b,1,one,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
b,1,two,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
b,2,one,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [43]:
myframe.reset_index(['1-9','a-z'])

一 ~ 九,a-z,1-9,一,一,一,二,二,二
q-p,Unnamed: 1_level_1,Unnamed: 2_level_1,q,q,w,q,q,w
p-q,Unnamed: 1_level_2,Unnamed: 2_level_2,p,o,p,p,o,p
one-nine,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
one,a,1,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
two,a,1,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
one,a,2,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
one,b,1,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
two,b,1,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
one,b,2,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [44]:
myframe.reset_index(drop=False)

一 ~ 九,a-z,1-9,one-nine,一,一,一,二,二,二
q-p,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,q,q,w,q,q,w
p-q,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,p,o,p,p,o,p
0,a,1,one,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
1,a,1,two,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
2,a,2,one,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
3,b,1,one,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
4,b,1,two,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
5,b,2,one,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [45]:
myframe.reset_index(['1-9','a-z'])

一 ~ 九,a-z,1-9,一,一,一,二,二,二
q-p,Unnamed: 1_level_1,Unnamed: 2_level_1,q,q,w,q,q,w
p-q,Unnamed: 1_level_2,Unnamed: 2_level_2,p,o,p,p,o,p
one-nine,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
one,a,1,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
two,a,1,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
one,a,2,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
one,b,1,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
two,b,1,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
one,b,2,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [46]:
myframe.set_index(['一','一','一','二'])

一,一.1,一.2,二
"(q, p)","(q, p)","(q, p)","(q, p)"
"(q, o)","(q, o)","(q, o)","(q, o)"
"(w, p)","(w, p)","(w, p)","(w, p)"


In [47]:
myframe.reset_index(['1-9'],col_level=1,col_fill=(1,2))

Unnamed: 0_level_0,一 ~ 九,"(1, 2)",一,一,一,二,二,二
Unnamed: 0_level_1,q-p,1-9,q,q,w,q,q,w
Unnamed: 0_level_2,p-q,"(1, 2)",p,o,p,p,o,p
a-z,one-nine,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
a,one,1,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
a,two,1,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
a,one,2,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
b,one,1,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
b,two,1,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
b,one,2,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [48]:
myframe['二']

Unnamed: 0_level_0,Unnamed: 1_level_0,q-p,q,q,w
Unnamed: 0_level_1,Unnamed: 1_level_1,p-q,p,o,p
a-z,1-9,one-nine,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
a,1,one,-0.246659,-1.19762,0.121984
a,1,two,-0.878329,0.573607,0.763896
a,2,one,-0.094256,-0.204955,1.468762
b,1,one,0.774334,-0.190393,-0.640379
b,1,two,0.306349,1.518703,-0.747103
b,2,one,-1.819668,1.780578,-1.522783


In [49]:
myframe

Unnamed: 0_level_0,Unnamed: 1_level_0,一 ~ 九,一,一,一,二,二,二
Unnamed: 0_level_1,Unnamed: 1_level_1,q-p,q,q,w,q,q,w
Unnamed: 0_level_2,Unnamed: 1_level_2,p-q,p,o,p,p,o,p
a-z,1-9,one-nine,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
a,1,one,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
a,1,two,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
a,2,one,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
b,1,one,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
b,1,two,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
b,2,one,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [50]:
myframe.reset_index(['a-z'])

Unnamed: 0_level_0,一 ~ 九,a-z,一,一,一,二,二,二
Unnamed: 0_level_1,q-p,Unnamed: 2_level_1,q,q,w,q,q,w
Unnamed: 0_level_2,p-q,Unnamed: 2_level_2,p,o,p,p,o,p
1-9,one-nine,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
1,one,a,-0.729872,-0.473511,2.059662,-0.246659,-1.19762,0.121984
1,two,a,-0.667986,-0.076551,-0.052711,-0.878329,0.573607,0.763896
2,one,a,-0.213781,-1.447093,-0.074662,-0.094256,-0.204955,1.468762
1,one,b,-1.187352,0.59455,0.792184,0.774334,-0.190393,-0.640379
1,two,b,0.006615,0.928664,-0.471846,0.306349,1.518703,-0.747103
2,one,b,-0.451008,-1.213096,-2.301961,-1.819668,1.780578,-1.522783


In [51]:
df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],
                    'data1': range(7)})

In [52]:
df2 = pd.DataFrame({'key':['a','b','d'],
                    'data2':range(3)})

In [53]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [54]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [55]:
pd.merge(df1,df2,on='key')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [56]:
df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],
                    'data1': range(7)})

In [57]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [58]:
df2 = pd.DataFrame({'key':['a','b','d'],
                    'data2':range(3)})

In [59]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [60]:
df1.merge(df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [61]:
df2.merge(df1)

Unnamed: 0,key,data2,data1
0,a,0,2
1,a,0,4
2,a,0,5
3,b,1,0
4,b,1,1
5,b,1,6


In [62]:
pd.merge(df1,df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [63]:
pd.merge(df2,df1)

Unnamed: 0,key,data2,data1
0,a,0,2
1,a,0,4
2,a,0,5
3,b,1,0
4,b,1,1
5,b,1,6


In [64]:
pd.merge(df1,df2,on='key')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [65]:
df3 = pd.DataFrame({'lkey':['b','b','a','c','a','a','b'],
                    'data1':range(7)})

In [66]:
df4 = pd.DataFrame({'rkey':['a','b','d'],
                    'data2':range(3)})

In [67]:
pd.merge(df3,df4,left_on='lkey',right_on='rkey')

Unnamed: 0,lkey,data1,rkey,data2
0,b,0,b,1
1,b,1,b,1
2,b,6,b,1
3,a,2,a,0
4,a,4,a,0
5,a,5,a,0


In [68]:
pd.merge(df1,df2,how='outer')

Unnamed: 0,key,data1,data2
0,b,0.0,1.0
1,b,1.0,1.0
2,b,6.0,1.0
3,a,2.0,0.0
4,a,4.0,0.0
5,a,5.0,0.0
6,c,3.0,
7,d,,2.0


In [69]:
pd.merge(df1,df2,how='cross')

Unnamed: 0,key_x,data1,key_y,data2
0,b,0,a,0
1,b,0,b,1
2,b,0,d,2
3,b,1,a,0
4,b,1,b,1
5,b,1,d,2
6,a,2,a,0
7,a,2,b,1
8,a,2,d,2
9,c,3,a,0


In [70]:
df1 = pd.DataFrame({'key':['b','b','a','c','a','b'],
                    'data1':range(6)})

In [71]:
df2 = pd.DataFrame({'key':['a','b','a','b','d'],
                    'data2':range(5)})

In [72]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [73]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,a,2
3,b,3
4,d,4


In [74]:
pd.merge(df1,df2,on='key',how='left')

Unnamed: 0,key,data1,data2
0,b,0,1.0
1,b,0,3.0
2,b,1,1.0
3,b,1,3.0
4,a,2,0.0
5,a,2,2.0
6,c,3,
7,a,4,0.0
8,a,4,2.0
9,b,5,1.0


In [75]:
pd.merge(df1,df2,how='inner')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,0,3
2,b,1,1
3,b,1,3
4,b,5,1
5,b,5,3
6,a,2,0
7,a,2,2
8,a,4,0
9,a,4,2


In [76]:
left = pd.DataFrame({'key1':['foo','foo','bar'],
                     'key2':['one','two','one'],
                     'lval':[1,2,3]})

In [77]:
right = pd.DataFrame({'key1':['foo','foo','bar','bar'],
                      'key2':['one','one','o1ne','two'],
                      'rval':[5,5,6,7]})

In [78]:
left.merge(right,on=['key1','key2'],how='outer')

Unnamed: 0,key1,key2,lval,rval
0,foo,one,1.0,5.0
1,foo,one,1.0,5.0
2,foo,two,2.0,
3,bar,one,3.0,
4,bar,o1ne,,6.0
5,bar,two,,7.0


In [79]:
import pandas as pd

In [80]:
df1  = pd.DataFrame({'key':['b','b','a','c','a','a','b'],
                     'label':['r','t','y','u','i','o','p'],
                     'data1':range(7)})

In [81]:
df2 = pd.DataFrame({'key':['a','b','d'],
                    'label':['a','t','y'],
                    'data2':range(3)})

In [82]:
df1

Unnamed: 0,key,label,data1
0,b,r,0
1,b,t,1
2,a,y,2
3,c,u,3
4,a,i,4
5,a,o,5
6,b,p,6


In [83]:
df2

Unnamed: 0,key,label,data2
0,a,a,0
1,b,t,1
2,d,y,2


In [84]:
pd.merge(df1,df2,how='inner',indicator=True,on='key',validate='m:m')

Unnamed: 0,key,label_x,data1,label_y,data2,_merge
0,b,r,0,t,1,both
1,b,t,1,t,1,both
2,b,p,6,t,1,both
3,a,y,2,a,0,both
4,a,i,4,a,0,both
5,a,o,5,a,0,both


In [85]:
pd.merge(df1,df2,left_on='key',right_on='label',how='outer')

Unnamed: 0,key_x,label_x,data1,key_y,label_y,data2
0,b,r,0.0,,,
1,b,t,1.0,,,
2,b,p,6.0,,,
3,a,y,2.0,a,a,0.0
4,a,i,4.0,a,a,0.0
5,a,o,5.0,a,a,0.0
6,c,u,3.0,,,
7,,,,b,t,1.0
8,,,,d,y,2.0


In [86]:
pd.merge(df1,df2,left_index=True,right_index=True,how='outer')

Unnamed: 0,key_x,label_x,data1,key_y,label_y,data2
0,b,r,0,a,a,0.0
1,b,t,1,b,t,1.0
2,a,y,2,d,y,2.0
3,c,u,3,,,
4,a,i,4,,,
5,a,o,5,,,
6,b,p,6,,,


In [87]:
df = pd.DataFrame({'Let':['A','B','C'],
                   'Num':[1,2,3,]})

In [88]:
df

Unnamed: 0,Let,Num
0,A,1
1,B,2
2,C,3


In [89]:
ser = pd.Series(['a','b','c','d','e','f'],
                index=[['A','B','C']*2,[1,2,3,4,5,6]],
                )

In [90]:
ser.index.names = ['Let','Num']

In [91]:
ser

Let  Num
A    1      a
B    2      b
C    3      c
A    4      d
B    5      e
C    6      f
dtype: object

In [92]:
ser.sort_index(inplace=True)

In [93]:
mydata = ser.reset_index('Num',inplace=False)

In [94]:
import numpy as np
import pandas as pd

In [95]:
data = pd.Series(np.random.randn(9),
                 index=[['a','a','a','b','b','c','c','d','d'],
                        [1,2,3,1,3,1,2,2,3]])

In [96]:
data

a  1   -0.623773
   2    1.375920
   3    0.797608
b  1    1.172852
   3    1.390023
c  1   -0.667493
   2   -0.370875
d  2   -0.168968
   3    0.633576
dtype: float64

In [97]:
data.index

MultiIndex([('a', 1),
            ('a', 2),
            ('a', 3),
            ('b', 1),
            ('b', 3),
            ('c', 1),
            ('c', 2),
            ('d', 2),
            ('d', 3)],
           )

In [98]:
data.index.levels

FrozenList([['a', 'b', 'c', 'd'], [1, 2, 3]])

In [99]:
data

a  1   -0.623773
   2    1.375920
   3    0.797608
b  1    1.172852
   3    1.390023
c  1   -0.667493
   2   -0.370875
d  2   -0.168968
   3    0.633576
dtype: float64

In [100]:
type(data.d)

pandas.core.series.Series

In [101]:
data['b':'c']

b  1    1.172852
   3    1.390023
c  1   -0.667493
   2   -0.370875
dtype: float64

In [102]:
data.iloc[1]

1.3759199561056406

In [103]:
data['b',1]

1.1728519180109704

In [104]:
data.loc[:,2]

a    1.375920
c   -0.370875
d   -0.168968
dtype: float64

In [105]:
data.unstack()

Unnamed: 0,1,2,3
a,-0.623773,1.37592,0.797608
b,1.172852,,1.390023
c,-0.667493,-0.370875,
d,,-0.168968,0.633576


In [106]:
data.unstack().stack()

a  1   -0.623773
   2    1.375920
   3    0.797608
b  1    1.172852
   3    1.390023
c  1   -0.667493
   2   -0.370875
d  2   -0.168968
   3    0.633576
dtype: float64

In [107]:
data.unstack(0)

Unnamed: 0,a,b,c,d
1,-0.623773,1.172852,-0.667493,
2,1.37592,,-0.370875,-0.168968
3,0.797608,1.390023,,0.633576


In [108]:
mydata = pd.Series(np.random.randn(12),
                   index=[['a']*4+['b']*4+['c']*4,
                          ([1]*2+[2]*2)*3,
                          ['x','y']*6])

In [109]:
mydata

a  1  x    0.110867
      y    0.251473
   2  x   -1.724244
      y   -2.053780
b  1  x    0.418893
      y   -0.323750
   2  x    2.164626
      y   -1.126058
c  1  x    0.631082
      y   -0.909706
   2  x    1.410472
      y   -0.192186
dtype: float64

In [110]:
mydata.unstack()

Unnamed: 0,Unnamed: 1,x,y
a,1,0.110867,0.251473
a,2,-1.724244,-2.05378
b,1,0.418893,-0.32375
b,2,2.164626,-1.126058
c,1,0.631082,-0.909706
c,2,1.410472,-0.192186


In [111]:
mydata.unstack([2,1]).sort_index(axis=1)

Unnamed: 0_level_0,x,x,y,y
Unnamed: 0_level_1,1,2,1,2
a,0.110867,-1.724244,0.251473,-2.05378
b,0.418893,2.164626,-0.32375,-1.126058
c,0.631082,1.410472,-0.909706,-0.192186


In [112]:
frame = pd.DataFrame(np.arange(12).reshape((4,3)),
                     index=[['a','a','b','b'],
                            [1,2,1,2]],
                     columns=[['Ohio','Ohio','Colorado'],
                              ['Green','Red','Green']])

In [113]:
frame

Unnamed: 0_level_0,Unnamed: 1_level_0,Ohio,Ohio,Colorado
Unnamed: 0_level_1,Unnamed: 1_level_1,Green,Red,Green
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [114]:
frame.Ohio

Unnamed: 0,Unnamed: 1,Green,Red
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [115]:
frame.columns.names=['state','color']

In [116]:
frame.index.names = ['key1','key2']

In [117]:
frame

Unnamed: 0_level_0,state,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [118]:
frame.Ohio

Unnamed: 0_level_0,color,Green,Red
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
a,1,0,1
a,2,3,4
b,1,6,7
b,2,9,10


In [119]:
my_multi_index = pd.MultiIndex.from_arrays([['Ohio','Ohio','Colorado'],['Green','Red','Green']],
                          names=['State','color'])

In [120]:
frame.columns

MultiIndex([(    'Ohio', 'Green'),
            (    'Ohio',   'Red'),
            ('Colorado', 'Green')],
           names=['state', 'color'])

In [121]:
frame.columns = my_multi_index

In [122]:
frame

Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [123]:
frame.swaplevel('key1','key2').sort_index()

Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key2,key1,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1,a,0,1,2
1,b,6,7,8
2,a,3,4,5
2,b,9,10,11


In [124]:
frame

Unnamed: 0_level_0,State,Ohio,Ohio,Colorado
Unnamed: 0_level_1,color,Green,Red,Green
key1,key2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
a,1,0,1,2
a,2,3,4,5
b,1,6,7,8
b,2,9,10,11


In [125]:
frame = pd.DataFrame({'a':range(7),'b':range(7,0,-1),
                      'c':['one']*3+['two']*4,
                      'd':[0,1,2,0,1,2,3]})

In [126]:
frame

Unnamed: 0,a,b,c,d
0,0,7,one,0
1,1,6,one,1
2,2,5,one,2
3,3,4,two,0
4,4,3,two,1
5,5,2,two,2
6,6,1,two,3


In [127]:
frame2 = frame.set_index(['c','d'])

In [128]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [129]:
frame.set_index(['c','d'],drop=False,append=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b,c,d
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,0,0,7,one,0
one,1,1,6,one,1
one,2,2,5,one,2
two,0,3,4,two,0
two,1,4,3,two,1
two,2,5,2,two,2
two,3,6,1,two,3


In [130]:
frame2.reset_index()

Unnamed: 0,c,d,a,b
0,one,0,0,7
1,one,1,1,6
2,one,2,2,5
3,two,0,3,4
4,two,1,4,3
5,two,2,5,2
6,two,3,6,1


In [131]:
frame3 = frame2

In [132]:
frame3

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [133]:
frame.index

RangeIndex(start=0, stop=7, step=1)

In [134]:
frame2.index

MultiIndex([('one', 0),
            ('one', 1),
            ('one', 2),
            ('two', 0),
            ('two', 1),
            ('two', 2),
            ('two', 3)],
           names=['c', 'd'])

In [135]:
frame2 = frame.set_index(['c','d'])

In [136]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [137]:
frame2.index

MultiIndex([('one', 0),
            ('one', 1),
            ('one', 2),
            ('two', 0),
            ('two', 1),
            ('two', 2),
            ('two', 3)],
           names=['c', 'd'])

In [138]:
frame2.columns

Index(['a', 'b'], dtype='object')

In [139]:
frame2['b'].one[1]

6

In [140]:
frame2

Unnamed: 0_level_0,Unnamed: 1_level_0,a,b
c,d,Unnamed: 2_level_1,Unnamed: 3_level_1
one,0,0,7
one,1,1,6
one,2,2,5
two,0,3,4
two,1,4,3
two,2,5,2
two,3,6,1


In [141]:
df1 = pd.DataFrame({'key':['b','b','a','c','a','a','b'],
                    'data1':range(7)})

In [142]:
df2 = pd.DataFrame({'key':['a','b','d'],
                    'data2':range(3)})

In [143]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,a,5
6,b,6


In [144]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,d,2


In [145]:
df1.merge(df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [146]:
pd.merge(df1,df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [147]:
pd.merge(df1,df2,on='key')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,1,1
2,b,6,1
3,a,2,0
4,a,4,0
5,a,5,0


In [148]:
df3 = pd.DataFrame({'lkey':['b','b','a','c','a','a','b'],
                    'data1':range(7)})

In [149]:
df4 = pd.DataFrame({'rkey':['a','b','d'],
                    'data2':range(3)})

In [150]:
pd.merge(df3,df4,left_on='lkey',right_on='rkey',how='outer')

Unnamed: 0,lkey,data1,rkey,data2
0,b,0.0,b,1.0
1,b,1.0,b,1.0
2,b,6.0,b,1.0
3,a,2.0,a,0.0
4,a,4.0,a,0.0
5,a,5.0,a,0.0
6,c,3.0,,
7,,,d,2.0


In [151]:
df1 = pd.DataFrame({'key':['b','b','a','c','a','b'],
                    'data1':range(6)})

In [152]:
df2 = pd.DataFrame({'key':['a','b','a','b','d'],
                    'data2':range(5)})

In [153]:
df1

Unnamed: 0,key,data1
0,b,0
1,b,1
2,a,2
3,c,3
4,a,4
5,b,5


In [154]:
df2

Unnamed: 0,key,data2
0,a,0
1,b,1
2,a,2
3,b,3
4,d,4


In [155]:
pd.merge(df1,df2)

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,0,3
2,b,1,1
3,b,1,3
4,b,5,1
5,b,5,3
6,a,2,0
7,a,2,2
8,a,4,0
9,a,4,2


In [156]:
pd.merge(df1,df2,on='key',how='left')

Unnamed: 0,key,data1,data2
0,b,0,1.0
1,b,0,3.0
2,b,1,1.0
3,b,1,3.0
4,a,2,0.0
5,a,2,2.0
6,c,3,
7,a,4,0.0
8,a,4,2.0
9,b,5,1.0


In [157]:
pd.merge(df1,df2,how='inner')

Unnamed: 0,key,data1,data2
0,b,0,1
1,b,0,3
2,b,1,1
3,b,1,3
4,b,5,1
5,b,5,3
6,a,2,0
7,a,2,2
8,a,4,0
9,a,4,2


In [158]:
left = pd.DataFrame({'key1':['foo','foo','bar'],
                     'key2':['one','two','one'],
                     'lval':[1,2,3]})

In [159]:
right = pd.DataFrame({'key1':['foo','foo','bar','bar'],
                      'key2':['one','one','one','two'],
                      'rval':[4,5,6,7]})

In [160]:
pd.merge(left,right,on=['key1','key2'],how='outer')

Unnamed: 0,key1,key2,lval,rval
0,foo,one,1.0,4.0
1,foo,one,1.0,5.0
2,foo,two,2.0,
3,bar,one,3.0,6.0
4,bar,two,,7.0


In [161]:
pd.merge(left,right,on='key1')

Unnamed: 0,key1,key2_x,lval,key2_y,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [162]:
pd.merge(left,right,on='key1',suffixes=('_left','_right'))

Unnamed: 0,key1,key2_left,lval,key2_right,rval
0,foo,one,1,one,4
1,foo,one,1,one,5
2,foo,two,2,one,4
3,foo,two,2,one,5
4,bar,one,3,one,6
5,bar,one,3,two,7


In [163]:
left = pd.DataFrame({'key':['a','b','a','a','b','c'],
                     'value':range(6)})

In [164]:
right = pd.DataFrame({'group_val':[3.5,7]},index=['a','b'])

In [165]:
left

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


In [166]:
right

Unnamed: 0,group_val
a,3.5
b,7.0


In [167]:
pd.merge(left,right,left_on='key',right_index=True)

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0


In [168]:
pd.merge(left,right,left_on='key',right_index=True,how='outer')

Unnamed: 0,key,value,group_val
0,a,0,3.5
2,a,2,3.5
3,a,3,3.5
1,b,1,7.0
4,b,4,7.0
5,c,5,


In [169]:
lefth = pd.DataFrame({'key1':['Ohio','Ohio','Ohio','Nevada','Nevada'],
                      'key2':[2000,2001,2002,2001,2002],
                      'data':np.arange(5.)})

In [170]:
righth = pd.DataFrame(np.arange(12).reshape((6,2)),
                      index = [['Nevada','Nevada','Ohio','Ohio','Ohio','Ohio'],
                               [2001,2000,2000,2000,2001,2002]],
                      columns = ['event1','event2'])

In [171]:
lefth

Unnamed: 0,key1,key2,data
0,Ohio,2000,0.0
1,Ohio,2001,1.0
2,Ohio,2002,2.0
3,Nevada,2001,3.0
4,Nevada,2002,4.0


In [172]:
righth

Unnamed: 0,Unnamed: 1,event1,event2
Nevada,2001,0,1
Nevada,2000,2,3
Ohio,2000,4,5
Ohio,2000,6,7
Ohio,2001,8,9
Ohio,2002,10,11


In [173]:
pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True)

Unnamed: 0,key1,key2,data,event1,event2
0,Ohio,2000,0.0,4,5
0,Ohio,2000,0.0,6,7
1,Ohio,2001,1.0,8,9
2,Ohio,2002,2.0,10,11
3,Nevada,2001,3.0,0,1


In [174]:
pd.merge(lefth,righth,left_on=['key1','key2'],right_index=True,how='outer')

Unnamed: 0,key1,key2,data,event1,event2
0,Ohio,2000,0.0,4.0,5.0
0,Ohio,2000,0.0,6.0,7.0
1,Ohio,2001,1.0,8.0,9.0
2,Ohio,2002,2.0,10.0,11.0
3,Nevada,2001,3.0,0.0,1.0
4,Nevada,2002,4.0,,
4,Nevada,2000,,2.0,3.0


In [175]:
left2 = pd.DataFrame([[1.,2.],[3.,4.],[5.,6.]],
                     index = ['a','c','e'],
                     columns = ['Ohio','Nevada'])

In [176]:
right2 = pd.DataFrame([[7.,8.],[9.,10.],[11.,12.],[13,14]],
                      index = ['b','c','d','e'],
                      columns=['Missouri','Alabama'])

In [177]:
left2

Unnamed: 0,Ohio,Nevada
a,1.0,2.0
c,3.0,4.0
e,5.0,6.0


In [178]:
right2

Unnamed: 0,Missouri,Alabama
b,7.0,8.0
c,9.0,10.0
d,11.0,12.0
e,13.0,14.0


In [179]:
pd.merge(left2,right2,how='outer',left_index=True,right_index=True)

Unnamed: 0,Ohio,Nevada,Missouri,Alabama
a,1.0,2.0,,
b,,,7.0,8.0
c,3.0,4.0,9.0,10.0
d,,,11.0,12.0
e,5.0,6.0,13.0,14.0


In [180]:
left2.join(right2,how='outer')

Unnamed: 0,Ohio,Nevada,Missouri,Alabama
a,1.0,2.0,,
b,,,7.0,8.0
c,3.0,4.0,9.0,10.0
d,,,11.0,12.0
e,5.0,6.0,13.0,14.0


In [181]:
left.join(right,on='key')

Unnamed: 0,key,value,group_val
0,a,0,3.5
1,b,1,7.0
2,a,2,3.5
3,a,3,3.5
4,b,4,7.0
5,c,5,


In [182]:
left

Unnamed: 0,key,value
0,a,0
1,b,1
2,a,2
3,a,3
4,b,4
5,c,5


In [183]:
right

Unnamed: 0,group_val
a,3.5
b,7.0


In [184]:
another = pd.DataFrame([[7,8],[9,10],[11,12],[16,17]],
                       index = ['a','c','e','f'],
                       columns = ['New York','Nevada'])

In [185]:
another

Unnamed: 0,New York,Nevada
a,7,8
c,9,10
e,11,12
f,16,17


In [186]:
left2

Unnamed: 0,Ohio,Nevada
a,1.0,2.0
c,3.0,4.0
e,5.0,6.0


In [187]:
left2.join([right2,another])

ValueError: Indexes have overlapping values: Index(['Nevada'], dtype='object')

In [188]:
left2.join(another,how='outer',sort=True,lsuffix='_left',validate='1:1')

Unnamed: 0,Ohio,Nevada_left,New York,Nevada
a,1.0,2.0,7,8
c,3.0,4.0,9,10
e,5.0,6.0,11,12
f,,,16,17


In [189]:
arr = np.arange(12).reshape((3,4))

In [190]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [191]:
np.concatenate([arr,arr],axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [192]:
s1 = pd.Series([0,1],index=['a','b'])
s2 = pd.Series([2,3,4],index=['c','d','e'])
s3 = pd.Series([5,6],index=['f','g'])

In [193]:
pd.concat([s1,s2,s3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [194]:
pd.concat([s1,s2,s3],axis=1)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [195]:
s4 = pd.concat([s1,s3])

In [196]:
s4

a    0
b    1
f    5
g    6
dtype: int64

In [197]:
pd.concat([s1,s4],axis=1)

Unnamed: 0,0,1
a,0.0,0
b,1.0,1
f,,5
g,,6


In [198]:
pd.concat([s1,s4],axis=1,join='inner')

Unnamed: 0,0,1
a,0,0
b,1,1


In [199]:
pd.concat([s1,s4],axis=1,join_axes=['a','c','b','e'],)

TypeError: concat() got an unexpected keyword argument 'join_axes'

In [200]:
import sys

In [201]:
print(sys.version)
print(sys.executable)

3.11.4 (tags/v3.11.4:d2340ef, Jun  7 2023, 05:45:37) [MSC v.1934 64 bit (AMD64)]
C:\Users\HUAWEI\AppData\Local\Programs\Python\Python311\python.exe


In [202]:
result = pd.concat([s1,s1,s3],keys=['one','two','three'])

In [203]:
result

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [204]:
result.unstack()

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [205]:
pd.concat([s1,s2,s3],axis=1,keys=['one','two','three'])

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [206]:
df1 = pd.DataFrame(np.arange(6).reshape((3,2)),
                   index = ['a','b','c'],
                   columns = ['one','two'])

In [207]:
df2 = pd.DataFrame(5+ np.arange(4).reshape((2,2)),index=['a','c'],
                   columns=['three','four'])

In [208]:
df1

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [209]:
df2

Unnamed: 0,three,four
a,5,6
c,7,8


In [210]:
pd.concat([df1,df2],axis=1,keys=['level1','level2'])

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [211]:
pd.concat({'level1':df1,'level2':df2},axis=1)

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [212]:
pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower'],ignore_index=False)

upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [213]:
df1 = pd.DataFrame(np.random.randn(3,4),columns=['a','b','c','d'])
df2 = pd.DataFrame(np.random.randn(2,3),columns=['b','d','a'])

In [214]:
df1

Unnamed: 0,a,b,c,d
0,1.081907,0.387477,-0.296244,-0.631229
1,-0.238916,0.57013,-0.995781,0.676979
2,-0.03111,-0.231282,1.193965,0.05127


In [215]:
df2

Unnamed: 0,b,d,a
0,0.624222,-0.196607,-2.022402
1,-0.028319,-0.018862,-1.329572


In [216]:
pd.concat([df1,df2],ignore_index=True,)

Unnamed: 0,a,b,c,d
0,1.081907,0.387477,-0.296244,-0.631229
1,-0.238916,0.57013,-0.995781,0.676979
2,-0.03111,-0.231282,1.193965,0.05127
3,-2.022402,0.624222,,-0.196607
4,-1.329572,-0.028319,,-0.018862


In [217]:
arr = np.arange(12).reshape((3,4))

In [218]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [219]:
np.concatenate([arr,arr],axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

In [220]:
s1

a    0
b    1
dtype: int64

In [221]:
s2

c    2
d    3
e    4
dtype: int64

In [222]:
s3

f    5
g    6
dtype: int64

In [223]:
pd.concat([s1,s2,s3],axis=1)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [224]:
pd.concat([s1,s2,s3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [225]:
s4

a    0
b    1
f    5
g    6
dtype: int64

In [226]:
s1

a    0
b    1
dtype: int64

In [227]:
pd.concat([s1,s4],axis=1,join='inner')

Unnamed: 0,0,1
a,0,0
b,1,1


In [228]:
pd.concat([s1,s2,s3],keys=['one','two','three'])

one    a    0
       b    1
two    c    2
       d    3
       e    4
three  f    5
       g    6
dtype: int64

In [229]:
pd.concat([s1,s2,s3],keys=['one','two','three'],axis=1)

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [230]:
result = pd.concat([s1,s2,s3],keys=['one','two','three'])

In [231]:
result

one    a    0
       b    1
two    c    2
       d    3
       e    4
three  f    5
       g    6
dtype: int64

In [232]:
result.unstack(level=0)

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [233]:
df1

Unnamed: 0,a,b,c,d
0,1.081907,0.387477,-0.296244,-0.631229
1,-0.238916,0.57013,-0.995781,0.676979
2,-0.03111,-0.231282,1.193965,0.05127


In [234]:
df1 = pd.DataFrame(np.arange(6).reshape((3,2)),
                   index = ['a','b','c'],
                   columns = ['one','two'])

In [235]:
df2 = pd.DataFrame(np.arange(4).reshape((2,2)),
                   index = ['a','c'],
                   columns = ['three','four'])

In [236]:
df1

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [237]:
df2

Unnamed: 0,three,four
a,0,1
c,2,3


In [238]:
pd.concat([df1,df2])

Unnamed: 0,one,two,three,four
a,0.0,1.0,,
b,2.0,3.0,,
c,4.0,5.0,,
a,,,0.0,1.0
c,,,2.0,3.0


In [239]:
pd.concat([df1,df2],axis=1,keys=['level1','level2'])

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,0.0,1.0
b,2,3,,
c,4,5,2.0,3.0


In [240]:
pd.concat({'level1':df1,'level2':df2},axis=1)

Unnamed: 0_level_0,level1,level1,level2,level2
Unnamed: 0_level_1,one,two,three,four
a,0,1,0.0,1.0
b,2,3,,
c,4,5,2.0,3.0


In [241]:
pd.concat([df1,df2],axis=1,keys=['level1','level2'],names=['upper','lower'])

upper,level1,level1,level2,level2
lower,one,two,three,four
a,0,1,0.0,1.0
b,2,3,,
c,4,5,2.0,3.0


In [242]:
df1 = pd.DataFrame(np.random.randn(3,4),columns=['a','b','c','d'])

In [243]:
df2 = pd.DataFrame(np.random.randn(2,3),columns=['b','d','a'])

In [244]:
pd.concat([df1,df2])

Unnamed: 0,a,b,c,d
0,-0.270768,0.254694,-0.80714,0.490033
1,-0.662266,-1.755306,1.616331,-0.446204
2,1.247232,0.064501,-1.069396,-0.005881
0,-0.952275,-0.370396,,0.546469
1,0.393734,-0.917872,,1.24916


In [245]:
pd.concat([df1,df2],ignore_index=True)

Unnamed: 0,a,b,c,d
0,-0.270768,0.254694,-0.80714,0.490033
1,-0.662266,-1.755306,1.616331,-0.446204
2,1.247232,0.064501,-1.069396,-0.005881
3,-0.952275,-0.370396,,0.546469
4,0.393734,-0.917872,,1.24916


In [246]:
df1.index=pd.Index(['one','two','three']) 

In [247]:
df2.index = pd.Index(['one','five'])

In [248]:
df1

Unnamed: 0,a,b,c,d
one,-0.270768,0.254694,-0.80714,0.490033
two,-0.662266,-1.755306,1.616331,-0.446204
three,1.247232,0.064501,-1.069396,-0.005881


In [249]:
df2

Unnamed: 0,b,d,a
one,-0.370396,0.546469,-0.952275
five,-0.917872,1.24916,0.393734


In [250]:
pd.concat([df1,df2])

Unnamed: 0,a,b,c,d
one,-0.270768,0.254694,-0.80714,0.490033
two,-0.662266,-1.755306,1.616331,-0.446204
three,1.247232,0.064501,-1.069396,-0.005881
one,-0.952275,-0.370396,,0.546469
five,0.393734,-0.917872,,1.24916


In [251]:
pd.concat([df1,df2],ignore_index=True)

Unnamed: 0,a,b,c,d
0,-0.270768,0.254694,-0.80714,0.490033
1,-0.662266,-1.755306,1.616331,-0.446204
2,1.247232,0.064501,-1.069396,-0.005881
3,-0.952275,-0.370396,,0.546469
4,0.393734,-0.917872,,1.24916


In [252]:
pd.concat([df1,df2],verify_integrity=False,keys=['1st','2nd'],axis=0,levels=)

SyntaxError: invalid syntax (3686732284.py, line 1)

In [253]:
mydf1 = pd.DataFrame(np.random.randn(4,2),
                     index = [['a','a','b','b'],[1,2,1,2]],
                     columns = ['val1','val2'])

In [254]:
mydf2 = pd.DataFrame(np.random.randn(4,2),
                     index = [['c','c','d','d'],[1,2,1,2]],
                     columns = ['val1','val2'])

In [255]:
mydf1

Unnamed: 0,Unnamed: 1,val1,val2
a,1,-1.866839,0.780247
a,2,-0.360854,-0.164077
b,1,-0.429882,-0.301465
b,2,0.194139,-1.674771


In [256]:
mydf2

Unnamed: 0,Unnamed: 1,val1,val2
c,1,-0.227366,0.642964
c,2,0.596588,-0.683178
d,1,-1.886024,-0.682911
d,2,-1.293175,-0.993232


In [257]:
pd.concat([mydf1,mydf2],keys=['A','B'],
          axis=1,
          levels=[['A','B','C']])

Unnamed: 0_level_0,Unnamed: 1_level_0,A,A,B,B
Unnamed: 0_level_1,Unnamed: 1_level_1,val1,val2,val1,val2
a,1,-1.866839,0.780247,,
a,2,-0.360854,-0.164077,,
b,1,-0.429882,-0.301465,,
b,2,0.194139,-1.674771,,
c,1,,,-0.227366,0.642964
c,2,,,0.596588,-0.683178
d,1,,,-1.886024,-0.682911
d,2,,,-1.293175,-0.993232


In [258]:
mydf1.index

MultiIndex([('a', 1),
            ('a', 2),
            ('b', 1),
            ('b', 2)],
           )

In [259]:
mydf2.index

MultiIndex([('c', 1),
            ('c', 2),
            ('d', 1),
            ('d', 2)],
           )

In [260]:
a = pd.Series([np.nan,2.5,0,3.5,4.5,np.nan],
              index = list('fedcba'))

In [261]:
b = pd.Series([0,np.nan,2,np.nan,np.nan,5],
              index = ['a','b','c','d','e','f'])

In [262]:
a

f    NaN
e    2.5
d    0.0
c    3.5
b    4.5
a    NaN
dtype: float64

In [263]:
b

a    0.0
b    NaN
c    2.0
d    NaN
e    NaN
f    5.0
dtype: float64

In [264]:
np.where(pd.isnull(a),b,a)

array([0. , 2.5, 0. , 3.5, 4.5, 5. ])

In [265]:
b.combine_first(a)

a    0.0
b    4.5
c    2.0
d    0.0
e    2.5
f    5.0
dtype: float64

In [266]:
df1 = pd.DataFrame({'a':[1,np.nan,5,np.nan],
                    'b':[np.nan,2,np.nan,6],
                    'c':range(2,18,4)})

In [267]:
df1

Unnamed: 0,a,b,c
0,1.0,,2
1,,2.0,6
2,5.0,,10
3,,6.0,14


In [268]:
df2 = pd.DataFrame({'a':[5,4,np.nan,3,7],
                    'b':[np.nan,3,4,6,8]})

In [269]:
df2

Unnamed: 0,a,b
0,5.0,
1,4.0,3.0
2,,4.0
3,3.0,6.0
4,7.0,8.0


In [270]:
df1.combine_first(df2)

Unnamed: 0,a,b,c
0,1.0,,2.0
1,4.0,2.0,6.0
2,5.0,4.0,10.0
3,3.0,6.0,14.0
4,7.0,8.0,


In [271]:
data = pd.DataFrame(np.arange(6).reshape((2,3)),
                    index = pd.Index(['Ohio','Colorado'],name='state'),
                    columns = pd.Index(['one','two','three'],name='number'))

In [272]:
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [273]:
result = data.stack()

In [274]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [275]:
result.unstack()

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [276]:
result.unstack(0)

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [277]:
s1 = pd.Series([0,1,2,3],index=['a','b','c','d'])

In [278]:
s2 = pd.Series([4,5,6],index=['c','d','e'])

In [279]:
data2 = pd.concat([s1,s2],keys=['one','two'])

In [280]:
data2

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [281]:
data2.unstack()

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2.0,3.0,
two,,,4.0,5.0,6.0


In [282]:
data2.unstack().stack(dropna=False)

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [283]:
df = pd.DataFrame({'left':result,'right':result+5},
                  columns = pd.Index(['left','right'],name='side'))

In [284]:
df

Unnamed: 0_level_0,side,left,right
state,number,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,one,0,5
Ohio,two,1,6
Ohio,three,2,7
Colorado,one,3,8
Colorado,two,4,9
Colorado,three,5,10


In [285]:
df.unstack(0)

side,left,left,right,right
state,Ohio,Colorado,Ohio,Colorado
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0,3,5,8
two,1,4,6,9
three,2,5,7,10


In [286]:
df.unstack('state')

side,left,left,right,right
state,Ohio,Colorado,Ohio,Colorado
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0,3,5,8
two,1,4,6,9
three,2,5,7,10


In [287]:
df.unstack('state').stack('side')

Unnamed: 0_level_0,state,Colorado,Ohio
number,side,Unnamed: 2_level_1,Unnamed: 3_level_1
one,left,3,0
one,right,8,5
two,left,4,1
two,right,9,6
three,left,5,2
three,right,10,7


In [288]:
data = pd.read_csv('examples/macrodata.csv')

In [289]:
data

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959,1,2710.349,1707.4,286.898,470.045,1886.9,28.980,139.7,2.82,5.8,177.146,0.00,0.00
1,1959,2,2778.801,1733.7,310.859,481.301,1919.7,29.150,141.7,3.08,5.1,177.830,2.34,0.74
2,1959,3,2775.488,1751.8,289.226,491.260,1916.4,29.350,140.5,3.82,5.3,178.657,2.74,1.09
3,1959,4,2785.204,1753.7,299.356,484.052,1931.3,29.370,140.0,4.33,5.6,179.386,0.27,4.06
4,1960,1,2847.699,1770.5,331.722,462.199,1955.5,29.540,139.6,3.50,5.2,180.007,2.31,1.19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,2008,3,13324.600,9267.7,1990.693,991.551,9838.3,216.889,1474.7,1.17,6.0,305.270,-3.16,4.33
199,2008,4,13141.920,9195.3,1857.661,1007.273,9920.4,212.174,1576.5,0.12,6.9,305.952,-8.79,8.91
200,2009,1,12925.410,9209.2,1558.494,996.287,9926.4,212.671,1592.8,0.22,8.1,306.547,0.94,-0.71
201,2009,2,12901.504,9189.0,1456.678,1023.528,10077.5,214.469,1653.6,0.18,9.2,307.226,3.37,-3.19


In [290]:
periods = pd.PeriodIndex(year=data.year,quarter=data.quarter,name='date')

In [291]:
columns = pd.Index(['realgdp','infl','unemp'],name='item')

In [292]:
data = data.reindex(columns=columns)

In [293]:
data.index = periods.to_timestamp('D','end')

In [294]:
ldata = data.stack().reset_index().rename(columns={0:'value'})

In [295]:
ldata[:10]

Unnamed: 0,date,item,value
0,1959-03-31 23:59:59.999999999,realgdp,2710.349
1,1959-03-31 23:59:59.999999999,infl,0.0
2,1959-03-31 23:59:59.999999999,unemp,5.8
3,1959-06-30 23:59:59.999999999,realgdp,2778.801
4,1959-06-30 23:59:59.999999999,infl,2.34
5,1959-06-30 23:59:59.999999999,unemp,5.1
6,1959-09-30 23:59:59.999999999,realgdp,2775.488
7,1959-09-30 23:59:59.999999999,infl,2.74
8,1959-09-30 23:59:59.999999999,unemp,5.3
9,1959-12-31 23:59:59.999999999,realgdp,2785.204


In [296]:
data

item,realgdp,infl,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 23:59:59.999999999,2710.349,0.00,5.8
1959-06-30 23:59:59.999999999,2778.801,2.34,5.1
1959-09-30 23:59:59.999999999,2775.488,2.74,5.3
1959-12-31 23:59:59.999999999,2785.204,0.27,5.6
1960-03-31 23:59:59.999999999,2847.699,2.31,5.2
...,...,...,...
2008-09-30 23:59:59.999999999,13324.600,-3.16,6.0
2008-12-31 23:59:59.999999999,13141.920,-8.79,6.9
2009-03-31 23:59:59.999999999,12925.410,0.94,8.1
2009-06-30 23:59:59.999999999,12901.504,3.37,9.2


In [297]:
ldata = data.stack().reset_index().rename(columns={0:'value'})

In [298]:
pivoted = ldata.pivot(index='date',columns='item',values='value')

In [299]:
pivoted

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 23:59:59.999999999,0.00,2710.349,5.8
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2
...,...,...,...
2008-09-30 23:59:59.999999999,-3.16,13324.600,6.0
2008-12-31 23:59:59.999999999,-8.79,13141.920,6.9
2009-03-31 23:59:59.999999999,0.94,12925.410,8.1
2009-06-30 23:59:59.999999999,3.37,12901.504,9.2


In [300]:
ldata['value2'] = np.random.randn(len(ldata))

In [301]:
ldata

Unnamed: 0,date,item,value,value2
0,1959-03-31 23:59:59.999999999,realgdp,2710.349,-0.566017
1,1959-03-31 23:59:59.999999999,infl,0.000,0.002811
2,1959-03-31 23:59:59.999999999,unemp,5.800,0.583907
3,1959-06-30 23:59:59.999999999,realgdp,2778.801,0.352597
4,1959-06-30 23:59:59.999999999,infl,2.340,-0.041813
...,...,...,...,...
604,2009-06-30 23:59:59.999999999,infl,3.370,0.143164
605,2009-06-30 23:59:59.999999999,unemp,9.200,-1.072522
606,2009-09-30 23:59:59.999999999,realgdp,12990.341,1.481215
607,2009-09-30 23:59:59.999999999,infl,3.560,1.088173


In [302]:
pivoted = ldata.pivot(index='date',columns='item')

In [303]:
pivoted

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31 23:59:59.999999999,0.00,2710.349,5.8,0.002811,-0.566017,0.583907
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1,-0.041813,0.352597,-0.646878
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3,-0.105063,0.030996,-1.009828
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6,-0.504939,0.047578,1.058418
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2,-0.481687,0.488161,-1.356536
...,...,...,...,...,...,...
2008-09-30 23:59:59.999999999,-3.16,13324.600,6.0,-0.332165,-0.427252,-0.251765
2008-12-31 23:59:59.999999999,-8.79,13141.920,6.9,-0.608928,-2.020439,-1.311138
2009-03-31 23:59:59.999999999,0.94,12925.410,8.1,-0.383308,0.219118,1.693257
2009-06-30 23:59:59.999999999,3.37,12901.504,9.2,0.143164,0.712472,-1.072522


In [304]:
pivoted['value']

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31 23:59:59.999999999,0.00,2710.349,5.8
1959-06-30 23:59:59.999999999,2.34,2778.801,5.1
1959-09-30 23:59:59.999999999,2.74,2775.488,5.3
1959-12-31 23:59:59.999999999,0.27,2785.204,5.6
1960-03-31 23:59:59.999999999,2.31,2847.699,5.2
...,...,...,...
2008-09-30 23:59:59.999999999,-3.16,13324.600,6.0
2008-12-31 23:59:59.999999999,-8.79,13141.920,6.9
2009-03-31 23:59:59.999999999,0.94,12925.410,8.1
2009-06-30 23:59:59.999999999,3.37,12901.504,9.2


In [305]:
df = pd.DataFrame({'key':['foo','bar','baz'],
                   'A':[1,2,3],
                   'B':[4,5,6],
                   'C':[7,8,9]})

In [306]:
df

Unnamed: 0,key,A,B,C
0,foo,1,4,7
1,bar,2,5,8
2,baz,3,6,9


In [307]:
melted = pd.melt(df,['key'])

In [308]:
melted

Unnamed: 0,key,variable,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6
6,foo,C,7
7,bar,C,8
8,baz,C,9


In [309]:
reshaped = melted.pivot(index='key',columns='variable',values='value')

In [310]:
reshaped

variable,A,B,C
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,2,5,8
baz,3,6,9
foo,1,4,7


In [311]:
pd.melt(df,id_vars=['key'],value_vars=['A','B'])

Unnamed: 0,key,variable,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6


In [312]:
pd.melt(df,value_vars=['A','B','C'])

Unnamed: 0,variable,value
0,A,1
1,A,2
2,A,3
3,B,4
4,B,5
5,B,6
6,C,7
7,C,8
8,C,9


In [313]:
pd.melt(df,value_vars=['key','A','B'])

Unnamed: 0,variable,value
0,key,foo
1,key,bar
2,key,baz
3,A,1
4,A,2
5,A,3
6,B,4
7,B,5
8,B,6


In [314]:
df

Unnamed: 0,key,A,B,C
0,foo,1,4,7
1,bar,2,5,8
2,baz,3,6,9


In [315]:
df.melt(['key'])

Unnamed: 0,key,variable,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6
6,foo,C,7
7,bar,C,8
8,baz,C,9


In [316]:
melted

Unnamed: 0,key,variable,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6
6,foo,C,7
7,bar,C,8
8,baz,C,9


In [319]:
reshaped = melted.pivot(index='key',columns='variable')

In [320]:
reshaped

Unnamed: 0_level_0,value,value,value
variable,A,B,C
key,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
bar,2,5,8
baz,3,6,9
foo,1,4,7


In [321]:
reshaped.reset_index()

Unnamed: 0_level_0,key,value,value,value
variable,Unnamed: 1_level_1,A,B,C
0,bar,2,5,8
1,baz,3,6,9
2,foo,1,4,7


In [322]:
pd.melt(df,['key'],['A','B'])

Unnamed: 0,key,variable,value
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6


In [323]:
pd.melt(df,value_vars=['A','B','C'])

Unnamed: 0,variable,value
0,A,1
1,A,2
2,A,3
3,B,4
4,B,5
5,B,6
6,C,7
7,C,8
8,C,9


In [326]:
pd.melt(df,id_vars=['key'],value_vars=['A','B','C'],value_name='VALUE',var_name='vars',col_level,)

Unnamed: 0,key,vars,VALUE
0,foo,A,1
1,bar,A,2
2,baz,A,3
3,foo,B,4
4,bar,B,5
5,baz,B,6
6,foo,C,7
7,bar,C,8
8,baz,C,9
