### Notes on reconstructing a `DataFrame` in `pandas`
File name: `pd_08_reshape.ipynb` <br>
Author: Xuhua Huang <br>
Last updated: September 13, 2022 <br>
Created on: September 13, 2022

In [1]:
import pandas as pd
import numpy as np

#### Calling `stack()` and `unstack()`

In [2]:
data: pd.DataFrame(dtype=np.int32) = pd.DataFrame(
    data=np.arange(6).reshape((2, 3)),
    index=pd.Index(['Ohio', 'Colorado'], name='state'),
    columns=pd.Index(['one', 'two', 'three'], name='number')
)
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [3]:
# Calling stack() method to transform
data_stacked: pd.DataFrame(dtype=np.int32) = data.stack()
data_stacked

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int32

In [4]:
# Restore the DataFrame
data_stacked.unstack()

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


#### `np.nan` introduced during `stack()`

In [5]:
s1: pd.Series(dtype=np.int8) = pd.Series(data=[0, 1, 2, 3], index=['a', 'b', 'c', 'd'])
s2: pd.Series(dtype=np.int8) = pd.Series(data=[4, 5, 6], index=['c', 'd', 'e'])
data_concat: pd.DataFrame = pd.concat([s1, s2], keys=['one', 'two'], axis=0)
data_concat

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [6]:
data_concat.unstack()

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2.0,3.0,
two,,,4.0,5.0,6.0


In [7]:
data_concat.unstack().stack(dropna=False)

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64