# pandas


In [1]:
import pandas as pd
import numpy as np

In [7]:
mser = pd.Series(np.random.rand(8),
                index=[['white','white','white','blue','blue','red','red','red'],
                      ['up','down','right','up','down','up','down','left']])
mser

white  up       0.590040
       down     0.891631
       right    0.393086
blue   up       0.258664
       down     0.100975
red    up       0.981230
       down     0.627297
       left     0.902027
dtype: float64

In [4]:
mser.index

MultiIndex([('white',    'up'),
            ('white',  'down'),
            ('white', 'right'),
            ( 'blue',    'up'),
            ( 'blue',  'down'),
            (  'red',    'up'),
            (  'red',  'down'),
            (  'red',  'left')],
           )

In [6]:
'''
mser.index
pd.MultiIndex(levels=[['blue','red','white'],['down','left','right','up']],
             labels=[[2,2,2,0,0,1,1,1],
                    [3,0,2,3,0,3,0,1]])
mser
'''

"\nmser.index\npd.MultiIndex(levels=[['blue','red','white'],['down','left','right','up']],\n             labels=[[2,2,2,0,0,1,1,1],\n                    [3,0,2,3,0,3,0,1]])\nmser\n"

In [8]:
mser['white']

up       0.590040
down     0.891631
right    0.393086
dtype: float64

In [9]:
mser[:,'up']

white    0.590040
blue     0.258664
red      0.981230
dtype: float64

In [10]:
mser['white','up']

0.590040458899633

In [11]:
mser

white  up       0.590040
       down     0.891631
       right    0.393086
blue   up       0.258664
       down     0.100975
red    up       0.981230
       down     0.627297
       left     0.902027
dtype: float64

In [12]:
mser.unstack()

Unnamed: 0,down,left,right,up
blue,0.100975,,,0.258664
red,0.627297,0.902027,,0.98123
white,0.891631,,0.393086,0.59004


In [14]:
mser.unstack().stack()

blue   down     0.100975
       up       0.258664
red    down     0.627297
       left     0.902027
       up       0.981230
white  down     0.891631
       right    0.393086
       up       0.590040
dtype: float64

In [15]:
mframe = pd.DataFrame(np.random.randn(16).reshape(4,4),
                     index=[['white','white','red','red'],['up','down','up','down']],
                     columns=[['pen','pen','paper','paper'],[1,2,1,2]])
mframe

Unnamed: 0_level_0,Unnamed: 1_level_0,pen,pen,paper,paper
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,1,2
white,up,-0.623448,0.214,-1.645826,-0.280838
white,down,-0.218799,-0.328961,-0.26522,0.489323
red,up,1.472147,-0.664966,0.74571,-0.184156
red,down,-0.840728,-0.532182,0.453933,0.689707


# 重新调整顺序和 层级排序

In [16]:
mframe.columns.names=['object','id']
mframe.index.names=['colors','status']
mframe

Unnamed: 0_level_0,object,pen,pen,paper,paper
Unnamed: 0_level_1,id,1,2,1,2
colors,status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
white,up,-0.623448,0.214,-1.645826,-0.280838
white,down,-0.218799,-0.328961,-0.26522,0.489323
red,up,1.472147,-0.664966,0.74571,-0.184156
red,down,-0.840728,-0.532182,0.453933,0.689707


In [17]:
mframe.swaplevel('colors','status') #交换列后，显然不存在相同属性。于是不会合并了

Unnamed: 0_level_0,object,pen,pen,paper,paper
Unnamed: 0_level_1,id,1,2,1,2
status,colors,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
up,white,-0.623448,0.214,-1.645826,-0.280838
down,white,-0.218799,-0.328961,-0.26522,0.489323
up,red,1.472147,-0.664966,0.74571,-0.184156
down,red,-0.840728,-0.532182,0.453933,0.689707


> sort_index()根据指定的某个层级进行排序

In [18]:
mframe.sort_index(level='colors')

Unnamed: 0_level_0,object,pen,pen,paper,paper
Unnamed: 0_level_1,id,1,2,1,2
colors,status,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
red,down,-0.840728,-0.532182,0.453933,0.689707
red,up,1.472147,-0.664966,0.74571,-0.184156
white,down,-0.218799,-0.328961,-0.26522,0.489323
white,up,-0.623448,0.214,-1.645826,-0.280838


## 按层级统计数据

In [20]:
mframe.sum(level='colors') #相同color做为一个分组，在这个分组，对每列进行sum

object,pen,pen,paper,paper
id,1,2,1,2
colors,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
white,-0.842246,-0.114961,-1.911046,0.208485
red,0.63142,-1.197148,1.199643,0.50555


In [22]:
mframe.sum(level='id',axis=1) #按照相同的id作为分组，在分组中对每行进行sum

Unnamed: 0_level_0,id,1,2
colors,status,Unnamed: 2_level_1,Unnamed: 3_level_1
white,up,-2.269274,-0.066838
white,down,-0.484019,0.160361
red,up,2.217857,-0.849123
red,down,-0.386795,0.157525
