In [1]:
# this is for jupyter showing all output without using print()
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import pandas as pd

## 1. df-->stack-->reset_index--pivot-->df

In [14]:
df = pd.DataFrame(data=[[0,1],[2,3]],index=['cat','dog'],columns=['weight','height'])

print('{:*^80}'.format('dataframe'))
df

print('{:*^80}'.format('stacked dataframe'))
df.stack()

print('{:*^80}'.format('stacked dataframe with index in column'))
df.stack().reset_index(level=1)
# Attribute Error: 'Series' object has no attribiute  'pivot_table'
# so must convert to DataFame before pivot_table
# khong cho level = 1 vao reset_index thi no khong nhan 'cat'&'dog' lam index
stacked = df.stack().reset_index(level=1)

print('{:*^80}'.format('pivot_table revovered original dataframe (with extra name for columns)'))
recovered_df1 = df.stack().reset_index(level=1).pivot_table(index=stacked.index,columns='level_1',values=0)
# pivot_table orders columns alphabetically, specifying values parameter prevents creation of useless multi-index col
recovered_df1.columns.name = None # remove 'level_1' column.name
recovered_df1

***********************************dataframe************************************


Unnamed: 0,weight,height
cat,0,1
dog,2,3


*******************************stacked dataframe********************************


cat  weight    0
     height    1
dog  weight    2
     height    3
dtype: int64

*********************stacked dataframe with index in column*********************


Unnamed: 0,level_1,0
cat,weight,0
cat,height,1
dog,weight,2
dog,height,3


*****pivot_table revovered original dataframe (with extra name for columns)*****


Unnamed: 0,height,weight
cat,1,0
dog,3,2


In [31]:
# pivot_table index to set index value
# colums to set columns value 'level_1' is the old col and the name is still there
# values to set value in the table, 0 is the col name of old table, don't set specific name 0 will remain
recovered_df1 = df.stack().reset_index(level=1).pivot_table(index=stacked.index,columns='level_1',values=0)
recovered_df1

level_1,height,weight
cat,1,0
dog,3,2


In [34]:
# this is show the columns value 'height', weight
recovered_df1.columns

Index(['height', 'weight'], dtype='object', name='level_1')

In [35]:
# showing cols name only multi-index has
recovered_df1.columns.name 

'level_1'

In [36]:
recovered_df1.columns.name = None # remove 'level_1' column.name 

In [37]:
recovered_df1

Unnamed: 0,height,weight
cat,1,0
dog,3,2


In [29]:
df.stack().reset_index(level=1).index

Index(['cat', 'cat', 'dog', 'dog'], dtype='object')

## 2. df-->stack-->convert to dataframe-->pivot-->df

In [54]:
type(stacked_df.index.get_level_values(0))

pandas.core.indexes.base.Index

In [55]:
stacked_df.index

MultiIndex([('cat', 'weight'),
            ('cat', 'height'),
            ('dog', 'weight'),
            ('dog', 'height')],
           )

In [56]:
# dataframe don't have get_level_values, index turn dataframe into index array
stacked_df.index.get_level_values(0)

Index(['cat', 'cat', 'dog', 'dog'], dtype='object')

In [61]:
print('{:*^80}'.format('dataframe'))
df

print('{:*^80}'.format('stack and convert to dataframe to expose pivot_table'))
stacked_df = pd.DataFrame(df.stack())
stacked_df

print('{:*^80}'.format('rather than unstack, pivot_table achieves the same'))
idx_lv0, idx_lv1 = stacked_df.index.get_level_values(0),stacked_df.index.get_level_values(1)

recovered_df2 = stacked_df.pivot_table(index=idx_lv0,columns=idx_lv1,values=0)
recovered_df2

***********************************dataframe************************************


Unnamed: 0,weight,height
cat,0,1
dog,2,3


**************stack and convert to dataframe to expose pivot_table**************


Unnamed: 0,Unnamed: 1,0
cat,weight,0
cat,height,1
dog,weight,2
dog,height,3


***************rather than unstack, pivot_table achieves the same***************


Unnamed: 0,height,weight
cat,1,0
dog,3,2


## 3. df-->melt-->add index-->pivot-->df

In [66]:
print('{:*^80}'.format('dataframe'))
df

print('{:*^80}'.format('melting loses index information'))
melted = df.melt()
# melt appends columns into new "variable" column, while stack adds columns
# to new inner index layer (same information end up different places)
melted

print('{:*^80}'.format('manually enrich index'))
# until this is solved: https://github.com/pandas-dev/pandas/issues/17440
# list(df.index)*len(df.columns) for more generalizations
melted.index = ['cat','dog']*2
melted

print('{:*^80}'.format('pivot_table recovered original dataframe (with extra name for columns)'))
recovered_df3 = melted.pivot_table(index=melted.index, columns='variable',values='value')
recovered_df3.columns.name=None # remove 'variable' column.name
recovered_df3

# melting loses index while pivot_table requires index parameter

***********************************dataframe************************************


Unnamed: 0,weight,height
cat,0,1
dog,2,3


************************melting loses index information*************************


Unnamed: 0,variable,value
0,weight,0
1,weight,2
2,height,1
3,height,3


*****************************manually enrich index******************************


Unnamed: 0,variable,value
cat,weight,0
dog,weight,2
cat,height,1
dog,height,3


*****pivot_table recovered original dataframe (with extra name for columns)*****


Unnamed: 0,height,weight
cat,1,0
dog,3,2
