In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

## SeriesとDataFrameの変換

In [2]:
df1 = DataFrame(np.arange(8).reshape((2,4)),
               index=pd.Index(['LA','SP'], name='city'),
               columns=pd.Index(['A','B','C','D'], name='letter'))

In [3]:
df1

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


### DataFrame -> Series

In [4]:
df1.stack()

city  letter
LA    A         0
      B         1
      C         2
      D         3
SP    A         4
      B         5
      C         6
      D         7
dtype: int64

### Series -> DataFrame

In [5]:
df1.stack().unstack()

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


In [6]:
df1.stack().unstack(0)

city,LA,SP
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [7]:
ser1 = Series([0,1,2], index=list('QXY'))
ser2 = Series([4,5,6], index=list('XYZ'))

In [10]:
ser_df = pd.concat([ser1,ser2], keys=['Alpha','Beta'])

In [11]:
ser_df

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: int64

In [12]:
ser_df.unstack()

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


null値はstack()時に取り除かれる

In [13]:
ser_df.unstack().stack()

Alpha  Q    0.0
       X    1.0
       Y    2.0
Beta   X    4.0
       Y    5.0
       Z    6.0
dtype: float64

null値を取り除きたくない場合

In [16]:
ser_df.unstack().stack(dropna=False)

Alpha  Q    0.0
       X    1.0
       Y    2.0
       Z    NaN
Beta   Q    NaN
       X    4.0
       Y    5.0
       Z    6.0
dtype: float64

## ピポットテーブル
- テーブルデータを集約して別のindex/columnで表すこと

In [20]:
import pandas.util.testing as tm
tm.N = 3

def unpivot(frame):
    N,K = frame.shape
    data = {'value': frame.values.ravel('F'),
           'variable':np.asarray(frame.columns).repeat(N),
           'date': np.tile(np.asarray(frame.index), K)}
    return DataFrame(data, columns=['date','variable','value'])

df2 = unpivot(tm.makeTimeDataFrame())

In [21]:
df2

Unnamed: 0,date,variable,value
0,2000-01-03,A,-0.009037
1,2000-01-04,A,1.21402
2,2000-01-05,A,2.74387
3,2000-01-03,B,2.219102
4,2000-01-04,B,1.494606
5,2000-01-05,B,-1.378727
6,2000-01-03,C,-0.148019
7,2000-01-04,C,-0.731868
8,2000-01-05,C,0.518597
9,2000-01-03,D,0.892242


In [22]:
df2_piv = df2.pivot('date', 'variable', 'value')

In [24]:
df2_piv

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-0.009037,2.219102,-0.148019,0.892242
2000-01-04,1.21402,1.494606,-0.731868,0.185101
2000-01-05,2.74387,-1.378727,0.518597,-1.644344
