In [2]:
%matplotlib inline
import numpy as np
import numpy.ma as ma
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import line_profiler

import datetime

from io import StringIO
from pandas.api.types import CategoricalDtype

import pandas._testing as tm

In [22]:
def unpivot(frame):
    N, K = frame.shape
    data = {
        "value":frame.to_numpy().ravel("F"),
        "variable":np.asarray(frame.columns).repeat(N),
        "date":np.tile(np.asarray(frame.index), K)
    }
    return pd.DataFrame(data, columns=['date', 'variable', 'value'])

df = unpivot(tm.makeTimeDataFrame(3))
df.head()

Unnamed: 0,date,variable,value
0,2000-01-03,A,-1.129489
1,2000-01-04,A,0.884005
2,2000-01-05,A,0.602088
3,2000-01-03,B,0.132881
4,2000-01-04,B,-1.495405


In [26]:
df.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-1.129489,0.132881,-0.675755,-0.33038
2000-01-04,0.884005,-1.495405,0.27074,-1.124825
2000-01-05,0.602088,0.875596,-1.324956,-0.294233


In [27]:
df["value2"] = df["value"] * 2
df.head()

Unnamed: 0,date,variable,value,value2
0,2000-01-03,A,-1.129489,-2.258979
1,2000-01-04,A,0.884005,1.768011
2,2000-01-05,A,0.602088,1.204176
3,2000-01-03,B,0.132881,0.265763
4,2000-01-04,B,-1.495405,-2.99081


In [31]:
pivoted = df.pivot(index='date', columns='variable')
pivoted['value2']

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-2.258979,0.265763,-1.35151,-0.660759
2000-01-04,1.768011,-2.99081,0.54148,-2.249649
2000-01-05,1.204176,1.751193,-2.649912,-0.588467


In [39]:
tuples = list(zip(*[
    [ "bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux", ],
    [ "one", "two", "one", "two", "one", "two", "one", "two", ],
]))

index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.441236,0.887606
bar,two,0.650041,2.448718
baz,one,-0.527134,-0.749858
baz,two,-0.911297,1.28442


In [61]:
stacked = df2.stack()
stacked

first  second   
bar    one     A    0.441236
               B    0.887606
       two     A    0.650041
               B    2.448718
baz    one     A   -0.527134
               B   -0.749858
       two     A   -0.911297
               B    1.284420
dtype: float64

In [58]:
%timeit stacked.unstack()

945 µs ± 21.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [60]:
%timeit stacked.reset_index().rename(columns={"level_2":"col1", 0:"col2"}).pivot_table(index=['first', 'second'], columns='col1', values='col2')

10.3 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [70]:
index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]])
df = pd.DataFrame(np.random.randn(4), index=index, columns=["A"])
print(df)
all(df.unstack().stack() == df.sort_index())

            A
2 a  0.446744
  b -1.484540
1 a  0.808466
  b -0.627887


True

In [75]:
columns = pd.MultiIndex.from_tuples([
    ("A", "cat", "long"), ("B", "cat", "long"),
    ("A", "dog", "short"), ("B", "dog", "short"),
], names=["exp", "animal", "hair_length"],)
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df.stack(level=["animal", "hair_length"])
df.stack(level=[1,2])

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,-0.345836,-0.01742
0,dog,short,-1.825979,0.887327
1,cat,long,1.480182,-0.10171
1,dog,short,-2.072065,-2.310006
2,cat,long,-0.338607,0.752293
2,dog,short,1.229075,-1.032769
3,cat,long,0.739229,0.216387
3,dog,short,1.005689,1.015874


In [88]:
df = pd.DataFrame({
    'exp':['1y','1y','1y','1y','1y','1y','1y','1y','1y',],
    'mat':['1y','1y','1y','2y','2y','2y','1y','1y','1y',],
    'reg':['in', 'in', 'in', 'in', 'in', 'in','in','in','in'],
    'con':['5w','5s','5c','5w','5s','5c','10w','10s','10c'],
    'pcon':['w','s','c','w','s','c','w','s','c'],
    'val':[2.5,0,-2.5,2.5,0,-2.5,5,0,-5],
    'rs':[6, 10, 4, 12, 30, 6,30,25,10]
})
df

#df['tem'] = df['con'].str.extract('(\d+)')
df

Unnamed: 0,exp,mat,reg,con,pcon,val,rs
0,1y,1y,in,5w,w,2.5,6
1,1y,1y,in,5s,s,0.0,10
2,1y,1y,in,5c,c,-2.5,4
3,1y,2y,in,5w,w,2.5,12
4,1y,2y,in,5s,s,0.0,30
5,1y,2y,in,5c,c,-2.5,6
6,1y,1y,in,10w,w,5.0,30
7,1y,1y,in,10s,s,0.0,25
8,1y,1y,in,10c,c,-5.0,10
