# Fusión (merging/join) de datasets por claves

* *60:00 min* | Última modificación: Agosto 11, 2021 | YouTube

In [1]:
import pandas as pd

## Parámetros left_on, right_on

In [2]:
%%writefile /tmp/clients.csv
clientId,name,location
10,Omar Y. Fletcher,6833 Mollis. Rd.
11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue"
12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd."
13,Ferris Q. Le,Ap #791-3809 Eu Street
14,Michael I. Gray,6715 Diam. Rd.
15,Alan T. Mullins,512-3640 Nisl Rd.
16,Morgan W. Zamora,416-6030 Vivamus Road
17,Lilah O. Morrison,3859 Mauris Ave
18,Chantale Z. Kelley,3433 Arcu. Rd.
19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave
20,Baker C. Hurst,249-6250 Velit. Rd.
21,Bevis W. Molina,"P.O. Box 935, 1521 At, St."
22,Martina B. Schroeder,689-7600 Mi St.
23,Kylie G. Bailey,1430 Diam. Road
24,Steel K. Glover,"757-681 Et, Av."
25,Lucas M. Suarez,699-9329 Magna Rd.
26,Anastasia Q. Mccray,"P.O. Box 780, 4487 Lobortis, St."
27,Germaine Q. Henson,589-4921 Duis Ave
28,Wilma U. Mcfadden,6917 Dictum Rd.
29,Merritt Q. Martinez,"P.O. Box 469, 7833 Euismod Av."

Writing /tmp/clients.csv


In [6]:
%%writefile /tmp/bonus.csv
Client-Id,bonus
10,279
13,160
14,267
27,215
16,169
17,263
18,233
19,292
20,208
21,247
22,240
23,161
28,463
29,790
11,138
12,227
15,231
24,248
25,216
26,291

Overwriting /tmp/bonus.csv


In [7]:
clients_df = pd.read_csv("/tmp/clients.csv")
bonus_df = pd.read_csv("/tmp/bonus.csv")

#
# Note que las dos claves aparecen en la tabla
#
pd.merge(
    clients_df,
    bonus_df,
    left_on="clientId",
    right_on="Client-Id",
)

Unnamed: 0,clientId,name,location,Client-Id,bonus
0,10,Omar Y. Fletcher,6833 Mollis. Rd.,10,279
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",11,138
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",12,227
3,13,Ferris Q. Le,Ap #791-3809 Eu Street,13,160
4,14,Michael I. Gray,6715 Diam. Rd.,14,267
5,15,Alan T. Mullins,512-3640 Nisl Rd.,15,231
6,16,Morgan W. Zamora,416-6030 Vivamus Road,16,169
7,17,Lilah O. Morrison,3859 Mauris Ave,17,263
8,18,Chantale Z. Kelley,3433 Arcu. Rd.,18,233
9,19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,19,292


In [9]:
#
# Remoción de una de las columnas
#
pd.merge(
    clients_df, 
    bonus_df, 
    left_on="clientId", 
    right_on="Client-Id",
).drop(
    "Client-Id",
    axis=1,
)

Unnamed: 0,clientId,name,location,bonus
0,10,Omar Y. Fletcher,6833 Mollis. Rd.,279
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",138
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",227
3,13,Ferris Q. Le,Ap #791-3809 Eu Street,160
4,14,Michael I. Gray,6715 Diam. Rd.,267
5,15,Alan T. Mullins,512-3640 Nisl Rd.,231
6,16,Morgan W. Zamora,416-6030 Vivamus Road,169
7,17,Lilah O. Morrison,3859 Mauris Ave,263
8,18,Chantale Z. Kelley,3433 Arcu. Rd.,233
9,19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,292


## Joins usando el parámetro how

![joins.png](assets/joins.png)

In [21]:
%%writefile /tmp/clients_a.csv
clientId,name,location,amount
10,Omar Y. Fletcher,6833 Mollis. Rd.,4929
11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",7366
12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",6184
13,Ferris Q. Le,Ap #791-3809 Eu Street,9872
14,Michael I. Gray,6715 Diam. Rd.,1673
15,Alan T. Mullins,512-3640 Nisl Rd.,7651
16,Morgan W. Zamora,416-6030 Vivamus Road,9472

Overwriting /tmp/clients_a.csv


In [22]:
%%writefile /tmp/clients_b.csv
clientId,name,location,bonus
13,Ferris Q. Le,Ap #791-3809 Eu Street,1392
14,Michael I. Gray,6715 Diam. Rd.,9387
15,Alan T. Mullins,512-3640 Nisl Rd.,6351
16,Morgan W. Zamora,416-6030 Vivamus Road,9384
17,Lilah O. Morrison,3859 Mauris Ave,1539
18,Chantale Z. Kelley,3433 Arcu. Rd.,9837
19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,9372

Overwriting /tmp/clients_b.csv


In [23]:
clients_a_df = pd.read_csv("/tmp/clients_a.csv")
clients_b_df = pd.read_csv("/tmp/clients_b.csv")

In [24]:
#
# Inner join
#
pd.merge(clients_a_df, clients_b_df, how="inner")

Unnamed: 0,clientId,name,location,amount,bonus
0,13,Ferris Q. Le,Ap #791-3809 Eu Street,9872,1392
1,14,Michael I. Gray,6715 Diam. Rd.,1673,9387
2,15,Alan T. Mullins,512-3640 Nisl Rd.,7651,6351
3,16,Morgan W. Zamora,416-6030 Vivamus Road,9472,9384


In [25]:
#
# Left join
#
pd.merge(clients_a_df, clients_b_df, how="left")

Unnamed: 0,clientId,name,location,amount,bonus
0,10,Omar Y. Fletcher,6833 Mollis. Rd.,4929,
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",7366,
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",6184,
3,13,Ferris Q. Le,Ap #791-3809 Eu Street,9872,1392.0
4,14,Michael I. Gray,6715 Diam. Rd.,1673,9387.0
5,15,Alan T. Mullins,512-3640 Nisl Rd.,7651,6351.0
6,16,Morgan W. Zamora,416-6030 Vivamus Road,9472,9384.0


In [26]:
#
# Right join
#
pd.merge(clients_a_df, clients_b_df, how="right")

Unnamed: 0,clientId,name,location,amount,bonus
0,13,Ferris Q. Le,Ap #791-3809 Eu Street,9872.0,1392
1,14,Michael I. Gray,6715 Diam. Rd.,1673.0,9387
2,15,Alan T. Mullins,512-3640 Nisl Rd.,7651.0,6351
3,16,Morgan W. Zamora,416-6030 Vivamus Road,9472.0,9384
4,17,Lilah O. Morrison,3859 Mauris Ave,,1539
5,18,Chantale Z. Kelley,3433 Arcu. Rd.,,9837
6,19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,,9372


In [27]:
#
# Right join
#
pd.merge(clients_a_df, clients_b_df, how="outer")

Unnamed: 0,clientId,name,location,amount,bonus
0,10,Omar Y. Fletcher,6833 Mollis. Rd.,4929.0,
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",7366.0,
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",6184.0,
3,13,Ferris Q. Le,Ap #791-3809 Eu Street,9872.0,1392.0
4,14,Michael I. Gray,6715 Diam. Rd.,1673.0,9387.0
5,15,Alan T. Mullins,512-3640 Nisl Rd.,7651.0,6351.0
6,16,Morgan W. Zamora,416-6030 Vivamus Road,9472.0,9384.0
7,17,Lilah O. Morrison,3859 Mauris Ave,,1539.0
8,18,Chantale Z. Kelley,3433 Arcu. Rd.,,9837.0
9,19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,,9372.0


In [28]:
#
# Right join
#
pd.merge(clients_a_df, clients_b_df, how="outer", indicator=True)

Unnamed: 0,clientId,name,location,amount,bonus,_merge
0,10,Omar Y. Fletcher,6833 Mollis. Rd.,4929.0,,left_only
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue",7366.0,,left_only
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd.",6184.0,,left_only
3,13,Ferris Q. Le,Ap #791-3809 Eu Street,9872.0,1392.0,both
4,14,Michael I. Gray,6715 Diam. Rd.,1673.0,9387.0,both
5,15,Alan T. Mullins,512-3640 Nisl Rd.,7651.0,6351.0,both
6,16,Morgan W. Zamora,416-6030 Vivamus Road,9472.0,9384.0,both
7,17,Lilah O. Morrison,3859 Mauris Ave,,1539.0,right_only
8,18,Chantale Z. Kelley,3433 Arcu. Rd.,,9837.0,right_only
9,19,Randall Q. Mcclure,Ap #584-7470 Nibh. Ave,,9372.0,right_only
