In [2]:
%matplotlib inline
import numpy as np
import numpy.ma as ma
import pandas as pd
from pandas import DataFrame, Series
import matplotlib.pyplot as plt
import seaborn as sns
import timeit
import line_profiler

import datetime

from io import StringIO
from pandas.api.types import CategoricalDtype

import pandas._testing as tm

In [22]:
def unpivot(frame):
    N, K = frame.shape
    data = {
        "value":frame.to_numpy().ravel("F"),
        "variable":np.asarray(frame.columns).repeat(N),
        "date":np.tile(np.asarray(frame.index), K)
    }
    return pd.DataFrame(data, columns=['date', 'variable', 'value'])

df = unpivot(tm.makeTimeDataFrame(3))
df.head()

Unnamed: 0,date,variable,value
0,2000-01-03,A,-1.129489
1,2000-01-04,A,0.884005
2,2000-01-05,A,0.602088
3,2000-01-03,B,0.132881
4,2000-01-04,B,-1.495405


In [26]:
df.pivot(index='date', columns='variable', values='value')

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-1.129489,0.132881,-0.675755,-0.33038
2000-01-04,0.884005,-1.495405,0.27074,-1.124825
2000-01-05,0.602088,0.875596,-1.324956,-0.294233


In [27]:
df["value2"] = df["value"] * 2
df.head()

Unnamed: 0,date,variable,value,value2
0,2000-01-03,A,-1.129489,-2.258979
1,2000-01-04,A,0.884005,1.768011
2,2000-01-05,A,0.602088,1.204176
3,2000-01-03,B,0.132881,0.265763
4,2000-01-04,B,-1.495405,-2.99081


In [31]:
pivoted = df.pivot(index='date', columns='variable')
pivoted['value2']

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-2.258979,0.265763,-1.35151,-0.660759
2000-01-04,1.768011,-2.99081,0.54148,-2.249649
2000-01-05,1.204176,1.751193,-2.649912,-0.588467


In [39]:
tuples = list(zip(*[
    [ "bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux", ],
    [ "one", "two", "one", "two", "one", "two", "one", "two", ],
]))

index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])

df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
df2 = df[:4]
df2

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
first,second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.441236,0.887606
bar,two,0.650041,2.448718
baz,one,-0.527134,-0.749858
baz,two,-0.911297,1.28442


In [61]:
stacked = df2.stack()
stacked

first  second   
bar    one     A    0.441236
               B    0.887606
       two     A    0.650041
               B    2.448718
baz    one     A   -0.527134
               B   -0.749858
       two     A   -0.911297
               B    1.284420
dtype: float64

In [58]:
%timeit stacked.unstack()

945 µs ± 21.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [60]:
%timeit stacked.reset_index().rename(columns={"level_2":"col1", 0:"col2"}).pivot_table(index=['first', 'second'], columns='col1', values='col2')

10.3 ms ± 102 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [70]:
index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]])
df = pd.DataFrame(np.random.randn(4), index=index, columns=["A"])
print(df)
all(df.unstack().stack() == df.sort_index())

            A
2 a  0.446744
  b -1.484540
1 a  0.808466
  b -0.627887


True

In [75]:
columns = pd.MultiIndex.from_tuples([
    ("A", "cat", "long"), ("B", "cat", "long"),
    ("A", "dog", "short"), ("B", "dog", "short"),
], names=["exp", "animal", "hair_length"],)
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df.stack(level=["animal", "hair_length"])
df.stack(level=[1,2])

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
Unnamed: 0_level_1,animal,hair_length,Unnamed: 3_level_1,Unnamed: 4_level_1
0,cat,long,-0.345836,-0.01742
0,dog,short,-1.825979,0.887327
1,cat,long,1.480182,-0.10171
1,dog,short,-2.072065,-2.310006
2,cat,long,-0.338607,0.752293
2,dog,short,1.229075,-1.032769
3,cat,long,0.739229,0.216387
3,dog,short,1.005689,1.015874


In [154]:
columns = pd.MultiIndex.from_tuples(
    [("A", "cat"), ("B", "dog"), ("B", "cat"),("A", "dog"),],
    names=['exp', "animals"]
)
index = pd.MultiIndex.from_product([("bar", "baz", "foo", "qux"), ("one", "two")], 
                                   names=["first", "second"])
df = pd.DataFrame(np.random.randn(8, 4), index=index, columns=columns)
df2 = df.iloc[[0, 1, 2, 4, 5, 7]]

In [132]:
df2.stack('exp')
df.stack('animals')

Unnamed: 0_level_0,Unnamed: 1_level_0,exp,A,B
first,second,animals,Unnamed: 3_level_1,Unnamed: 4_level_1
bar,one,cat,-1.277764,0.544884
bar,one,dog,-2.006077,0.75543
bar,two,cat,0.680017,0.111496
bar,two,dog,1.079009,0.045822
baz,one,cat,1.357956,-1.623726
baz,one,dog,1.015614,0.59991
baz,two,cat,-0.932898,1.390138
baz,two,dog,0.252586,-0.593359
foo,one,cat,-1.479511,1.091047
foo,one,dog,0.1328,-0.057754


In [136]:
df3 = df.iloc[[0, 1, 4, 7], [1, 2]]
df3.unstack()

exp,B,B,B,B
animals,dog,dog,cat,cat
second,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
bar,0.75543,0.045822,0.544884,0.111496
foo,-0.057754,,1.091047,
qux,,-0.494976,,2.318594


In [137]:
df3.unstack(fill_value=0.)

exp,B,B,B,B
animals,dog,dog,cat,cat
second,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
bar,0.75543,0.045822,0.544884,0.111496
foo,-0.057754,0.0,1.091047,0.0
qux,0.0,-0.494976,0.0,2.318594


In [156]:
df[:3].unstack(0)

exp,A,A,B,B,B,B,A,A
animals,cat,cat,dog,dog,cat,cat,dog,dog
first,bar,baz,bar,baz,bar,baz,bar,baz
second,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
one,-0.533941,1.510495,0.520594,0.848948,0.490015,-0.437616,2.417119,-1.314637
two,1.405792,,-0.234288,,-0.588648,,-0.650806,


In [158]:
df[:3].unstack(1)

exp,A,A,B,B,B,B,A,A
animals,cat,cat,dog,dog,cat,cat,dog,dog
second,one,two,one,two,one,two,one,two
first,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
bar,-0.533941,1.405792,0.520594,-0.234288,0.490015,-0.588648,2.417119,-0.650806
baz,1.510495,,0.848948,,-0.437616,,-1.314637,


In [169]:
df = DataFrame({
    'fr':[1.1, 2.9],
    'var':['10px', '12pz']
})
df.melt(id_vars='fr', var_name='Ramesh', value_name='ValueSuresh')

Unnamed: 0,fr,Ramesh,ValueSuresh
0,1.1,var,10px
1,2.9,var,12pz


In [174]:
cheese = pd.DataFrame({
    "first": ["John", "Mary"],
    "last": ["Doe", "Bo"],
    "height": [5.5, 6.0],
    "weight": [130, 150],
})
print(cheese)
cheese.melt(id_vars=['first', 'last'], var_name='quantity')

  first last  height  weight
0  John  Doe     5.5     130
1  Mary   Bo     6.0     150


Unnamed: 0,first,last,quantity,value
0,John,Doe,height,5.5
1,Mary,Bo,height,6.0
2,John,Doe,weight,130.0
3,Mary,Bo,weight,150.0


In [178]:
index = pd.MultiIndex.from_tuples([("person", "A"), ("person", "B")])
cheese = pd.DataFrame({
    "first": ["John", "Mary"],
    "last": ["Doe", "Bo"],
    "height": [5.5, 6.0],
    "weight": [130, 150],
}, index=index)

cheese.melt(id_vars=["first", "last"], ignore_index=False)

Unnamed: 0,Unnamed: 1,first,last,variable,value
person,A,John,Doe,height,5.5
person,B,Mary,Bo,height,6.0
person,A,John,Doe,weight,130.0
person,B,Mary,Bo,weight,150.0


In [180]:
dft = pd.DataFrame({
    "A1970": {0: "a", 1: "b", 2: "c"},
    "A1980": {0: "d", 1: "e", 2: "f"},
    "B1970": {0: 2.5, 1: 1.2, 2: 0.7},
    "B1980": {0: 3.2, 1: 1.3, 2: 0.1},
    "X": dict(zip(range(3), np.random.randn(3))),
})

dft["id"] = dft.index
pd.wide_to_long(dft, ["A", "B"], i="id", j="year")

Unnamed: 0_level_0,Unnamed: 1_level_0,X,A,B
id,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1970,-1.989128,a,2.5
1,1970,0.670074,b,1.2
2,1970,0.524666,c,0.7
0,1980,-1.989128,d,3.2
1,1980,0.670074,e,1.3
2,1980,0.524666,f,0.1
