## 将长透视为宽

In [1]:
import numpy as np
import pandas as pd

In [17]:
data = pd.read_csv('macrodata.csv')
data.head()

Unnamed: 0,year,quarter,realgdp,realcons,realinv,realgovt,realdpi,cpi,m1,tbilrate,unemp,pop,infl,realint
0,1959.0,1.0,2710.349,1707.4,286.898,470.045,1886.9,28.98,139.7,2.82,5.8,177.146,0.0,0.0
1,1959.0,2.0,2778.801,1733.7,310.859,481.301,1919.7,29.15,141.7,3.08,5.1,177.83,2.34,0.74
2,1959.0,3.0,2775.488,1751.8,289.226,491.26,1916.4,29.35,140.5,3.82,5.3,178.657,2.74,1.09
3,1959.0,4.0,2785.204,1753.7,299.356,484.052,1931.3,29.37,140.0,4.33,5.6,179.386,0.27,4.06
4,1960.0,1.0,2847.699,1770.5,331.722,462.199,1955.5,29.54,139.6,3.5,5.2,180.007,2.31,1.19


In [18]:
periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name='date')

In [19]:
columns = pd.Index(['realgdp', 'infl', 'unemp'], name='item')

In [20]:
data = data.reindex(columns=columns)

In [21]:
data.index = periods.to_timestamp('D', 'end')

In [22]:
ldata = data.stack().reset_index().rename(columns={0: 'value'})

In [23]:
ldata[:10]

Unnamed: 0,date,item,value
0,1959-03-31,realgdp,2710.349
1,1959-03-31,infl,0.0
2,1959-03-31,unemp,5.8
3,1959-06-30,realgdp,2778.801
4,1959-06-30,infl,2.34
5,1959-06-30,unemp,5.1
6,1959-09-30,realgdp,2775.488
7,1959-09-30,infl,2.74
8,1959-09-30,unemp,5.3
9,1959-12-31,realgdp,2785.204


In [24]:
data.stack().head()

date        item   
1959-03-31  realgdp    2710.349
            infl          0.000
            unemp         5.800
1959-06-30  realgdp    2778.801
            infl          2.340
dtype: float64

In [25]:
data.stack().reset_index().head()

Unnamed: 0,date,item,0
0,1959-03-31,realgdp,2710.349
1,1959-03-31,infl,0.0
2,1959-03-31,unemp,5.8
3,1959-06-30,realgdp,2778.801
4,1959-06-30,infl,2.34


In [27]:
# 枢轴
pivoted = ldata.pivot('date', 'item', 'value')

In [29]:
pivoted.head()

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


In [31]:
ldata.pivot('date', 'item').head()

Unnamed: 0_level_0,value,value,value
item,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


### 2个数值列，同时进行重塑

In [33]:
ldata['value2'] = np.random.randn(len(ldata))

In [35]:
ldata[:8]

Unnamed: 0,date,item,value,value2
0,1959-03-31,realgdp,2710.349,0.371477
1,1959-03-31,infl,0.0,0.927848
2,1959-03-31,unemp,5.8,1.204697
3,1959-06-30,realgdp,2778.801,1.143803
4,1959-06-30,infl,2.34,-0.708622
5,1959-06-30,unemp,5.1,-0.729167
6,1959-09-30,realgdp,2775.488,1.39633
7,1959-09-30,infl,2.74,-0.126639


In [37]:
pivoted2 = ldata.pivot('date', 'item')
pivoted2[:5]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,0.927848,0.371477,1.204697
1959-06-30,2.34,2778.801,5.1,-0.708622,1.143803,-0.729167
1959-09-30,2.74,2775.488,5.3,-0.126639,1.39633,-0.12122
1959-12-31,0.27,2785.204,5.6,-0.748069,-2.118907,-1.956467
1960-03-31,2.31,2847.699,5.2,0.478339,1.763733,-2.001064


In [38]:
pivoted2['value'][:5]

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


In [40]:
unstacked = ldata.set_index(['date', 'item']).unstack('item')
unstacked[:7]

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,0.927848,0.371477,1.204697
1959-06-30,2.34,2778.801,5.1,-0.708622,1.143803,-0.729167
1959-09-30,2.74,2775.488,5.3,-0.126639,1.39633,-0.12122
1959-12-31,0.27,2785.204,5.6,-0.748069,-2.118907,-1.956467
1960-03-31,2.31,2847.699,5.2,0.478339,1.763733,-2.001064
1960-06-30,0.14,2834.39,5.2,2.576842,1.097742,-0.799284
1960-09-30,2.7,2839.022,5.6,1.754228,0.49857,-1.158778
