# Rearranging and Reshaping Data

In [1]:
import pandas as pd
import numpy as np

## Pivoting Data Frames

In [23]:
# my observation: I went in thinking of pivot as being similar to groupby, but really it's different. It just reshapes, does not
# aggregate

In [17]:
# copied from https://pandas.pydata.org/pandas-docs/stable/reshaping.html
import pandas.util.testing as tm; tm.N = 3
def unpivot(frame):
    N, K = frame.shape
    data = {'value' : frame.values.ravel('F'),
            'variable' : np.asarray(frame.columns).repeat(N),
            'date' : np.tile(np.asarray(frame.index), K)}
    return pd.DataFrame(data, columns=['date', 'variable', 'value'])
df = unpivot(tm.makeTimeDataFrame())

In [22]:
df.info(), df.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 3 columns):
date        12 non-null datetime64[ns]
variable    12 non-null object
value       12 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 368.0+ bytes


(None,            value
 count  12.000000
 mean   -0.213154
 std     0.915675
 min    -1.780551
 25%    -0.740781
 50%    -0.333942
 75%     0.508195
 max     1.099740)

In [19]:
df.head()

Unnamed: 0,date,variable,value
0,2000-01-03,A,-0.643774
1,2000-01-04,A,0.150356
2,2000-01-05,A,0.607482
3,2000-01-03,B,-1.031804
4,2000-01-04,B,-0.424582


In [20]:
df.pivot(index='variable', columns='date', values='value')

date,2000-01-03 00:00:00,2000-01-04 00:00:00,2000-01-05 00:00:00
variable,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,-0.643774,0.150356,0.607482
B,-1.031804,-0.424582,-0.243301
C,-1.270602,0.4751,-0.557622
D,1.09974,-1.780551,1.06171


In [24]:
# If value is not specified, pandas will pivot all remaining variables seperately. In this case there is only one anyway. 
df.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-0.643774,-1.031804,-1.270602,1.09974
2000-01-04,0.150356,-0.424582,0.4751,-1.780551
2000-01-05,0.607482,-0.243301,-0.557622,1.06171


## Stacking and unstacking data frames

In [27]:
# create some sample data: https://pandas.pydata.org/pandas-docs/stable/reshaping.html
tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
                    ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]))
index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])
df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=['A', 'B'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.467123,-0.664689
bar,two,-0.225881,-0.284552
baz,one,0.789078,1.738412
baz,two,0.659419,-0.088753
foo,one,-0.948546,0.818862
foo,two,-1.415664,-0.358337
qux,one,0.330938,-0.276763
qux,two,-0.001601,-0.013525


In [28]:
df_unstack = df.unstack('second')
df_unstack

Unnamed: 0_level_0,A,A,B,B
second,one,two,one,two
first,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,-0.467123,-0.225881,-0.664689,-0.284552
baz,0.789078,0.659419,1.738412,-0.088753
foo,-0.948546,-1.415664,0.818862,-0.358337
qux,0.330938,-0.001601,-0.276763,-0.013525


In [29]:
df_stack_it_back = df_unstack.stack('second')
df_stack_it_back

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,-0.467123,-0.664689
bar,two,-0.225881,-0.284552
baz,one,0.789078,1.738412
baz,two,0.659419,-0.088753
foo,one,-0.948546,0.818862
foo,two,-1.415664,-0.358337
qux,one,0.330938,-0.276763
qux,two,-0.001601,-0.013525


## Melting DataFrames