In [1]:
import seaborn as sns
import pandas as pd
import numpy as np

In [2]:
tips = sns.load_dataset('tips')
tips.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3


In [3]:
tips_bill = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_tip = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()

del tips_bill['tip']
del tips_tip['total_bill']

In [4]:
tips_bill

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
sex,smoker,Unnamed: 2_level_1
Male,Yes,1337.07
Male,No,1919.75
Female,Yes,593.27
Female,No,977.68


In [5]:
tips_bill

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
sex,smoker,Unnamed: 2_level_1
Male,Yes,1337.07
Male,No,1919.75
Female,Yes,593.27
Female,No,977.68


In [6]:
pd.merge?

In [7]:
# we can merge on the indexes
pd.merge(tips_bill, tips_tip, 
         right_index=True, left_index=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,183.07
Male,No,1919.75,302.0
Female,Yes,593.27,96.74
Female,No,977.68,149.77


In [8]:
#we can reset indexes and then merge on the columns - perhaps the easiest way
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index(),
    on=['sex', 'smoker']
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [9]:
# it can actually infer the above - but be very careful with this
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index()
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [10]:
# it can merge on partial column and index
pd.merge(
    tips_bill.reset_index(), 
    tips_tip,
    left_on=['sex', 'smoker'],
    right_index=True
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,96.74
3,Female,No,977.68,149.77


In [11]:
#it can do interesting combinations
tips_bill_strange = tips_bill.reset_index(level=0)
tips_bill_strange

Unnamed: 0_level_0,sex,total_bill
smoker,Unnamed: 1_level_1,Unnamed: 2_level_1
Yes,Male,1337.07
No,Male,1919.75
Yes,Female,593.27
No,Female,977.68


In [12]:
pd.merge(
    tips_tip.reset_index(), 
    tips_bill_strange,
    on=['sex', 'smoker']
)

Unnamed: 0,sex,smoker,tip,total_bill
0,Male,Yes,183.07,1337.07
1,Male,No,302.0,1919.75
2,Female,Yes,96.74,593.27
3,Female,No,149.77,977.68


In [13]:
pd.merge(
    tips_tip.reset_index(), 
    tips_bill_strange,
    on=['sex', 'smoker']
)

Unnamed: 0,sex,smoker,tip,total_bill
0,Male,Yes,183.07,1337.07
1,Male,No,302.0,1919.75
2,Female,Yes,96.74,593.27
3,Female,No,149.77,977.68


In [14]:
# we can do any SQL-like functionality
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index().head(2),
    how='left'
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0
2,Female,Yes,593.27,
3,Female,No,977.68,


In [15]:
pd.merge(
    tips_bill.reset_index(), 
    tips_tip.reset_index().head(2),
    how='inner'
)

Unnamed: 0,sex,smoker,total_bill,tip
0,Male,Yes,1337.07,183.07
1,Male,No,1919.75,302.0


In [16]:
# and if you add an indicator...
pd.merge(
    tips_bill.reset_index().tail(3), 
    tips_tip.reset_index().head(3),
    how='outer',
    indicator=True
)

Unnamed: 0,sex,smoker,total_bill,tip,_merge
0,Male,No,1919.75,302.0,both
1,Female,Yes,593.27,96.74,both
2,Female,No,977.68,,left_only
3,Male,Yes,,183.07,right_only


In [17]:
# it can handle columns with the same name
pd.merge(tips_bill, 
         tips_bill, 
         right_index=True, 
         left_index=True,
         suffixes=('_left', '_right')
)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill_left,total_bill_right
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,1337.07
Male,No,1919.75,1919.75
Female,Yes,593.27,593.27
Female,No,977.68,977.68


In [18]:
# this adds the dataframes together row wise
pd.concat([tips_bill, tips_bill, tips_tip], sort=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,
Male,No,1919.75,
Female,Yes,593.27,
Female,No,977.68,
Male,Yes,1337.07,
Male,No,1919.75,
Female,Yes,593.27,
Female,No,977.68,
Male,Yes,,183.07
Male,No,,302.0


In [19]:
# this does it column wise
pd.concat([tips_bill, tips_tip], axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,1337.07,183.07
Male,No,1919.75,302.0
Female,Yes,593.27,96.74
Female,No,977.68,149.77
