# Pivoting demo

In [1]:
# Import NumPy and Pandas
import numpy as np
import pandas as pd

In [2]:
# Example data
people = pd.DataFrame([['M', 1977, 'YES', 1.5],
                       ['F', 1980, 'YES', 4.3],
                       ['M', 1980, 'NO',  3.4],
                       ['M', 1975, 'YES', 1.1],
                       ['F', 1977, 'NO',  0.1],
                       ['F', 1975, 'YES', 7]],
                       columns=['gender', 'year', 'opinion', 'value'])
people

Unnamed: 0,gender,year,opinion,value
0,M,1977,YES,1.5
1,F,1980,YES,4.3
2,M,1980,NO,3.4
3,M,1975,YES,1.1
4,F,1977,NO,0.1
5,F,1975,YES,7.0


In [3]:
# We want to re-arrange the data so that the index shows
# the "year" of birth, the columns show the "gender", and
# the content of the table is "value".
people.pivot(index='year', columns='gender', values='value')

gender,F,M
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1975,7.0,1.1
1977,0.1,1.5
1980,4.3,3.4


In [4]:
# The previous command worked, because there is a unique combination
# of year/gender.

# This will not work, because there are two (1975, 'YES') entries:
people.pivot(index='year', columns='opinion', values='value')

ValueError: Index contains duplicate entries, cannot reshape

In [5]:
# `pivot_table` is a more generic command, that allows specifying
# how to combine the values when there are multiplt matchinf ones.

# This is the same as our first pivot, without conflicts.
pd.pivot_table(people, index=['year'], columns=['gender'], values='value')

gender,F,M
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1975,7.0,1.1
1977,0.1,1.5
1980,4.3,3.4


In [6]:
# This is the pivot around 'opinion'. By default, the values
# are aggregated by computing the mean.
pd.pivot_table(people, index=['year'], columns=['opinion'], values='value')

opinion,NO,YES
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1975,,4.05
1977,0.1,1.5
1980,3.4,4.3


In [7]:
# We can specify another function for the aggregation. 
pd.pivot_table(people, index=['year'], columns=['opinion'], values='value', aggfunc=np.max)

opinion,NO,YES
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1975,,7.0
1977,0.1,1.5
1980,3.4,4.3


In [8]:
# Double pivot!!
pd.pivot_table(people, index=['year'], columns=['gender', 'opinion'],
               values='value', aggfunc=np.max)

gender,F,F,M,M
opinion,NO,YES,NO,YES
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
1975,,7.0,,1.1
1977,0.1,,,1.5
1980,,4.3,3.4,
