In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Pivoting a single varibale

In [3]:
df = pd.read_csv('data/users.csv')
df

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [4]:
df = df.pivot(index='weekday', columns='city', values='visitors')
df

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,326,456
Sun,139,237


# Pivoting all variables

In [5]:
df = pd.read_csv('data/users.csv')
df

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [6]:
df.pivot(index='weekday', columns='city', values='signups')

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,3,5
Sun,7,12


In [7]:
df.pivot(index='weekday', columns='city')

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


# Stacking and Unstacking

In [8]:
df = pd.read_csv('data/users.csv')
df = df.set_index(['city', 'weekday']).sort_index()
df

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


In [9]:
byweekday = df.unstack(level='weekday')
byweekday

Unnamed: 0_level_0,visitors,visitors,signups,signups
weekday,Mon,Sun,Mon,Sun
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Austin,326,139,3,7
Dallas,456,237,5,12


In [10]:
byweekday.stack(level='weekday')

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


In [11]:
bycity = df.unstack(level='city')
bycity

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


In [12]:
bycity.stack(level='city')

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
weekday,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,Austin,326,3
Mon,Dallas,456,5
Sun,Austin,139,7
Sun,Dallas,237,12


# Restoring the index order

In [13]:
new_users = bycity.stack(level='city')
new_users

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
weekday,city,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,Austin,326,3
Mon,Dallas,456,5
Sun,Austin,139,7
Sun,Dallas,237,12


In [14]:
new_users = new_users.swaplevel(0, 1)
new_users

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Dallas,Mon,456,5
Austin,Sun,139,7
Dallas,Sun,237,12


In [15]:
new_users.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Mon,326,3
Austin,Sun,139,7
Dallas,Mon,456,5
Dallas,Sun,237,12


# Adding names for readability

In [29]:
df = pd.read_csv('data/users.csv')
df = df.drop(['signups'], axis=1)
df

Unnamed: 0,weekday,city,visitors
0,Sun,Austin,139
1,Sun,Dallas,237
2,Mon,Austin,326
3,Mon,Dallas,456


In [31]:
df = df.pivot(index='weekday', columns='city', values='visitors')
df

city,Austin,Dallas
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,326,456
Sun,139,237


In [33]:
visitor_by_city_weekday = df.reset_index()
visitor_by_city_weekday

city,weekday,Austin,Dallas
0,Mon,326,456
1,Sun,139,237


In [43]:
visitors = pd.melt(visitor_by_city_weekday)
visitors

Unnamed: 0,city,value
0,weekday,Mon
1,weekday,Sun
2,Austin,326
3,Austin,139
4,Dallas,456
5,Dallas,237


In [44]:
visitors = pd.melt(visitor_by_city_weekday, id_vars=['weekday'])
visitors

Unnamed: 0,weekday,city,value
0,Mon,Austin,326
1,Sun,Austin,139
2,Mon,Dallas,456
3,Sun,Dallas,237


In [45]:
visitors = pd.melt(visitor_by_city_weekday, id_vars=['weekday'], value_name='visitors')
visitors

Unnamed: 0,weekday,city,visitors
0,Mon,Austin,326
1,Sun,Austin,139
2,Mon,Dallas,456
3,Sun,Dallas,237


# Going from wide to long

In [46]:
df = pd.read_csv('data/users.csv')
df

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [48]:
pd.melt(df)

Unnamed: 0,variable,value
0,weekday,Sun
1,weekday,Sun
2,weekday,Mon
3,weekday,Mon
4,city,Austin
5,city,Dallas
6,city,Austin
7,city,Dallas
8,visitors,139
9,visitors,237


In [53]:
# melt 'visitors' and 'signups' into a single columns
pd.melt(df, id_vars=['city', 'weekday'])

Unnamed: 0,city,weekday,variable,value
0,Austin,Sun,visitors,139
1,Dallas,Sun,visitors,237
2,Austin,Mon,visitors,326
3,Dallas,Mon,visitors,456
4,Austin,Sun,signups,7
5,Dallas,Sun,signups,12
6,Austin,Mon,signups,3
7,Dallas,Mon,signups,5


# Obtaining key-value pairs with melt()

In [55]:
df = pd.read_csv('data/users.csv')
df = df.set_index(['city', 'weekday'])
df

Unnamed: 0_level_0,Unnamed: 1_level_0,visitors,signups
city,weekday,Unnamed: 2_level_1,Unnamed: 3_level_1
Austin,Sun,139,7
Dallas,Sun,237,12
Austin,Mon,326,3
Dallas,Mon,456,5


In [59]:
pd.melt(df, col_level=0)

Unnamed: 0,variable,value
0,visitors,139
1,visitors,237
2,visitors,326
3,visitors,456
4,signups,7
5,signups,12
6,signups,3
7,signups,5


# Setting up a pivot table

In [61]:
df = pd.read_csv('data/users.csv')
df

Unnamed: 0,weekday,city,visitors,signups
0,Sun,Austin,139,7
1,Sun,Dallas,237,12
2,Mon,Austin,326,3
3,Mon,Dallas,456,5


In [63]:
by_city_day = df.pivot_table(index='weekday', columns='city')
by_city_day

Unnamed: 0_level_0,visitors,visitors,signups,signups
city,Austin,Dallas,Austin,Dallas
weekday,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Mon,326,456,3,5
Sun,139,237,7,12


# Using other aggregations in pivot tables

In [65]:
df.pivot_table(index='weekday', aggfunc='count')

Unnamed: 0_level_0,city,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,2,2,2
Sun,2,2,2


In [66]:
df.pivot_table(index='weekday', aggfunc=len)

Unnamed: 0_level_0,city,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mon,2,2,2
Sun,2,2,2


# Using margins in pivot tables

In [67]:
df.pivot_table(index='weekday', aggfunc=sum)

Unnamed: 0_level_0,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,8,782
Sun,19,376


In [68]:
df.pivot_table(index='weekday', aggfunc=sum, margins=True)

Unnamed: 0_level_0,signups,visitors
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1
Mon,8.0,782.0
Sun,19.0,376.0
All,27.0,1158.0
