## Working with Tables II: Merging

In [4]:
import pandas as pd

Example table

In [5]:
t1 = pd.DataFrame({'A':[1,2,3],'B':[4,5,6], 'X':['a','b','c']})
t1

Unnamed: 0,A,B,X
0,1,4,a
1,2,5,b
2,3,6,c


In [6]:
t2 = pd.DataFrame({'C':[11,22,33],'D':[44,55,66], 'X':['b','c','d']})
t2

Unnamed: 0,C,D,X
0,11,44,b
1,22,55,c
2,33,66,d


Merge vertically:

In [7]:
t1.append(t2)

Unnamed: 0,A,B,X,C,D
0,1.0,4.0,a,,
1,2.0,5.0,b,,
2,3.0,6.0,c,,
0,,,b,11.0,44.0
1,,,c,22.0,55.0
2,,,d,33.0,66.0


In [8]:
t1.append(t2, ignore_index = True)   # ignore old indexes

Unnamed: 0,A,B,X,C,D
0,1.0,4.0,a,,
1,2.0,5.0,b,,
2,3.0,6.0,c,,
3,,,b,11.0,44.0
4,,,c,22.0,55.0
5,,,d,33.0,66.0


Or with concat:

In [9]:
pd.concat([t1,t2], ignore_index=True)

Unnamed: 0,A,B,X,C,D
0,1.0,4.0,a,,
1,2.0,5.0,b,,
2,3.0,6.0,c,,
3,,,b,11.0,44.0
4,,,c,22.0,55.0
5,,,d,33.0,66.0


Merge horizontally:

In [10]:
t1.merge(t2, on = 'X')       # Merge on X

Unnamed: 0,A,B,X,C,D
0,2,5,b,11,44
1,3,6,c,22,55


In [11]:
t1.merge(t2, on = 'X', how = 'inner') 

Unnamed: 0,A,B,X,C,D
0,2,5,b,11,44
1,3,6,c,22,55


Keep all rows:

In [12]:
t1.merge(t2, on = 'X', how = 'outer') 

Unnamed: 0,A,B,X,C,D
0,1.0,4.0,a,,
1,2.0,5.0,b,11.0,44.0
2,3.0,6.0,c,22.0,55.0
3,,,d,33.0,66.0


Keep rows of left table:

In [13]:
t1.merge(t2, on='X', how = 'left')

Unnamed: 0,A,B,X,C,D
0,1,4,a,,
1,2,5,b,11.0,44.0
2,3,6,c,22.0,55.0


In [14]:
t1.merge(t2, on='X', how = 'right')

Unnamed: 0,A,B,X,C,D
0,2.0,5.0,b,11,44
1,3.0,6.0,c,22,55
2,,,d,33,66


Create another table:

In [15]:
t3 = pd.DataFrame({'C':[111,222,333],'D':[444,555,666], 'Y':['c','d','e']})
t3

Unnamed: 0,C,D,Y
0,111,444,c
1,222,555,d
2,333,666,e


Merge t1 and t3, keep all rows:

In [16]:
t1.merge(t3, left_on='X', right_on = 'Y')     #default is inner merge, only 1 row

Unnamed: 0,A,B,X,C,D,Y
0,3,6,c,111,444,c


In [17]:
t1.merge(t3, left_on='X', right_on = 'Y', how = 'outer')

Unnamed: 0,A,B,X,C,D,Y
0,1.0,4.0,a,,,
1,2.0,5.0,b,,,
2,3.0,6.0,c,111.0,444.0,c
3,,,,222.0,555.0,d
4,,,,333.0,666.0,e


Create 2 more tables:

In [18]:
t4 = pd.DataFrame({'A':[1,2,3],   'B':[4,5,6]},    index=['a','b','c'])
t5 = pd.DataFrame({'C':[11,22,33],'D':[44,55,66]}, index=['b','c','d'])

In [19]:
t4

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [20]:
t5

Unnamed: 0,C,D
b,11,44
c,22,55
d,33,66


Merge t4 and t5:

In [21]:
t4.join(t5)    #default left join

Unnamed: 0,A,B,C,D
a,1,4,,
b,2,5,11.0,44.0
c,3,6,22.0,55.0


In [22]:
t4.join(t5, how='outer')

Unnamed: 0,A,B,C,D
a,1.0,4.0,,
b,2.0,5.0,11.0,44.0
c,3.0,6.0,22.0,55.0
d,,,33.0,66.0


In [23]:
t4.join(t5, how='inner')

Unnamed: 0,A,B,C,D
b,2,5,11,44
c,3,6,22,55


In [24]:
t4.join(t5, how='right')

Unnamed: 0,A,B,C,D
b,2.0,5.0,11,44
c,3.0,6.0,22,55
d,,,33,66
