### Long Format Data

In [21]:
import pandas as pd
data = pd.read_csv("data/macrodata.csv")

periods = pd.PeriodIndex(year=data.year, quarter=data.quarter, name="date")

columns = pd.Index(["realgdp", "infl", "unemp"], name="item")
data = data.reindex(columns=columns)

data.index = periods.to_timestamp("D", "end")
data.head()


item,realgdp,infl,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,2710.349,0.0,5.8
1959-06-30,2778.801,2.34,5.1
1959-09-30,2775.488,2.74,5.3
1959-12-31,2785.204,0.27,5.6
1960-03-31,2847.699,2.31,5.2


In [22]:
long_data = data.stack().reset_index().rename(columns={0:"value"})
long_data.head()
## this is called long or stacked format time series. Its each represent one observation. 
## This format is easier to store in relational database because fixed schema can store variable items

Unnamed: 0,date,item,value
0,1959-03-31,realgdp,2710.349
1,1959-03-31,infl,0.0
2,1959-03-31,unemp,5.8
3,1959-06-30,realgdp,2778.801
4,1959-06-30,infl,2.34


### Pivot

In [29]:
long_data.pivot("date", "item", "value").head() ## params: row_index_column, column_index column and value column

item,infl,realgdp,unemp
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2


### Pivot table with two value columns 

In [31]:
import numpy as np

long_data["value2"] = np.random.randn(len(long_data))
long_data.head()

Unnamed: 0,date,item,value,value2
0,1959-03-31,realgdp,2710.349,-0.675832
1,1959-03-31,infl,0.0,-0.354158
2,1959-03-31,unemp,5.8,-0.943724
3,1959-06-30,realgdp,2778.801,-0.152308
4,1959-06-30,infl,2.34,-0.236958


In [35]:
long_data.pivot("date", "item", values=["value", "value2"]).head()

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,-0.354158,-0.675832,-0.943724
1959-06-30,2.34,2778.801,5.1,-0.236958,-0.152308,-0.882157
1959-09-30,2.74,2775.488,5.3,-0.390892,-0.024654,-0.920335
1959-12-31,0.27,2785.204,5.6,0.67932,-0.366116,-0.728271
1960-03-31,2.31,2847.699,5.2,-0.424663,-0.232569,0.575816


### Unstack Vs Pivot

In [40]:
# piviot is equvalent making data and item hirarchical index then doing unstack
long_data_h = long_data.set_index(["date", "item"])
long_data_h.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,value,value2
date,item,Unnamed: 2_level_1,Unnamed: 3_level_1
1959-03-31,realgdp,2710.349,-0.675832
1959-03-31,infl,0.0,-0.354158
1959-03-31,unemp,5.8,-0.943724
1959-06-30,realgdp,2778.801,-0.152308
1959-06-30,infl,2.34,-0.236958


In [43]:
pivot_data = long_data_h.unstack("item")
pivot_data.head()

Unnamed: 0_level_0,value,value,value,value2,value2,value2
item,infl,realgdp,unemp,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
1959-03-31,0.0,2710.349,5.8,-0.354158,-0.675832,-0.943724
1959-06-30,2.34,2778.801,5.1,-0.236958,-0.152308,-0.882157
1959-09-30,2.74,2775.488,5.3,-0.390892,-0.024654,-0.920335
1959-12-31,0.27,2785.204,5.6,0.67932,-0.366116,-0.728271
1960-03-31,2.31,2847.699,5.2,-0.424663,-0.232569,0.575816


### Melt
- inverse of pivot

In [69]:
pivot = long_data.pivot("date", "item", "value")
pd.melt(pivot).head()

Unnamed: 0,item,value
0,infl,0.0
1,infl,2.34
2,infl,2.74
3,infl,0.27
4,infl,2.31


In [81]:
un_pivot=pivot.reset_index().melt("date")
un_pivot.head()

Unnamed: 0,date,item,value
0,1959-03-31,infl,0.0
1,1959-06-30,infl,2.34
2,1959-09-30,infl,2.74
3,1959-12-31,infl,0.27
4,1960-03-31,infl,2.31


In [83]:
un_pivot.pivot("date", "item").head()

Unnamed: 0_level_0,value,value,value
item,infl,realgdp,unemp
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1959-03-31,0.0,2710.349,5.8
1959-06-30,2.34,2778.801,5.1
1959-09-30,2.74,2775.488,5.3
1959-12-31,0.27,2785.204,5.6
1960-03-31,2.31,2847.699,5.2
