# Desambiguación y actualización

* *60:00 min* | Última modificación: Agosto 11, 2021 | YouTube

In [1]:
import pandas as pd

## Sufijos

In [3]:
%%writefile /tmp/data_1.csv
clientId,info
10,Omar Y. Fletcher
11,Buffy W. Vincent
12,Mira N. Franklin
13,Ferris Q. Le
14,Michael I. Gray
15,Alan T. Mullins

Writing /tmp/data_1.csv


In [4]:
%%writefile /tmp/data_2.csv
clientId,info
10,6833 Mollis. Rd.
11,"P.O. Box 345, 8390 Ante Avenue"
12,"P.O. Box 445, 323 Cursus Rd."
13,Ap #791-3809 Eu Street
14,6715 Diam. Rd.
15,512-3640 Nisl Rd.

Writing /tmp/data_2.csv


In [5]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
display(df_1, df_2)

Unnamed: 0,clientId,info
0,10,Omar Y. Fletcher
1,11,Buffy W. Vincent
2,12,Mira N. Franklin
3,13,Ferris Q. Le
4,14,Michael I. Gray
5,15,Alan T. Mullins


Unnamed: 0,clientId,info
0,10,6833 Mollis. Rd.
1,11,"P.O. Box 345, 8390 Ante Avenue"
2,12,"P.O. Box 445, 323 Cursus Rd."
3,13,Ap #791-3809 Eu Street
4,14,6715 Diam. Rd.
5,15,512-3640 Nisl Rd.


In [7]:
#
# Sufijos generados para desambiguar los nombres
# de las columnas
#
pd.merge(
    df_1,
    df_2,
    on="clientId",
)

Unnamed: 0,clientId,info_x,info_y
0,10,Omar Y. Fletcher,6833 Mollis. Rd.
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue"
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd."
3,13,Ferris Q. Le,Ap #791-3809 Eu Street
4,14,Michael I. Gray,6715 Diam. Rd.
5,15,Alan T. Mullins,512-3640 Nisl Rd.


In [8]:
#
# Especificación del sufijo
#
pd.merge(
    df_1,
    df_2,
    on="clientId",
    suffixes=["_LD", "_RD"],
)

Unnamed: 0,clientId,info_LD,info_RD
0,10,Omar Y. Fletcher,6833 Mollis. Rd.
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue"
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd."
3,13,Ferris Q. Le,Ap #791-3809 Eu Street
4,14,Michael I. Gray,6715 Diam. Rd.
5,15,Alan T. Mullins,512-3640 Nisl Rd.


In [10]:
#
# Renombramiento de las columnas
#
pd.merge(
    df_1,
    df_2,
    on="clientId",
).rename(
    columns={
        "info_x": "name",
        "info_y": "location",
    }
)

Unnamed: 0,clientId,name,location
0,10,Omar Y. Fletcher,6833 Mollis. Rd.
1,11,Buffy W. Vincent,"P.O. Box 345, 8390 Ante Avenue"
2,12,Mira N. Franklin,"P.O. Box 445, 323 Cursus Rd."
3,13,Ferris Q. Le,Ap #791-3809 Eu Street
4,14,Michael I. Gray,6715 Diam. Rd.
5,15,Alan T. Mullins,512-3640 Nisl Rd.


## Update

In [41]:
%%writefile /tmp/data_1.csv
clientId,name,bonus,costs
10,Omar Y. Fletcher,0,9999
11,Buffy W. Vincent,1,
12,Mira N. Franklin,2,
13,Ferris Q. Le,3,
14,Michael I. Gray,4,
15,Alan T. Mullins,5,

Overwriting /tmp/data_1.csv


In [42]:
%%writefile /tmp/data_2.csv
clientId,location,bonus,costs
12,"P.O. Box 445, 323 Cursus Rd.",,12
13,Ap #791-3809 Eu Street,,13
14,6715 Diam. Rd.,,14
15,512-3640 Nisl Rd.,9999,15
16,416-6030 Vivamus Road,,16
17,3859 Mauris Ave,,17
18,3433 Arcu. Rd.,,

Overwriting /tmp/data_2.csv


In [43]:
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.index = range(2,9)
display(df_1, df_2)

Unnamed: 0,clientId,name,bonus,costs
0,10,Omar Y. Fletcher,0,9999.0
1,11,Buffy W. Vincent,1,
2,12,Mira N. Franklin,2,
3,13,Ferris Q. Le,3,
4,14,Michael I. Gray,4,
5,15,Alan T. Mullins,5,


Unnamed: 0,clientId,location,bonus,costs
2,12,"P.O. Box 445, 323 Cursus Rd.",,12.0
3,13,Ap #791-3809 Eu Street,,13.0
4,14,6715 Diam. Rd.,,14.0
5,15,512-3640 Nisl Rd.,9999.0,15.0
6,16,416-6030 Vivamus Road,,16.0
7,17,3859 Mauris Ave,,17.0
8,18,3433 Arcu. Rd.,,


In [44]:
#
# Update df_1.
#
# Note que costs para el primer cliente se 
# actualiza con el valor del sÇegundo df
#
# Tambien se actualiza el ultimo registro
# para el campo bonus
#
df_1.update(df_2)
df_1

Unnamed: 0,clientId,name,bonus,costs
0,10.0,Omar Y. Fletcher,0.0,9999.0
1,11.0,Buffy W. Vincent,1.0,
2,12.0,Mira N. Franklin,2.0,12.0
3,13.0,Ferris Q. Le,3.0,13.0
4,14.0,Michael I. Gray,4.0,14.0
5,15.0,Alan T. Mullins,9999.0,15.0


In [45]:
#
# Update df_2
#
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.index = range(2,9)
df_2.update(df_1)
df_2

Unnamed: 0,clientId,location,bonus,costs
2,12.0,"P.O. Box 445, 323 Cursus Rd.",2.0,12.0
3,13.0,Ap #791-3809 Eu Street,3.0,13.0
4,14.0,6715 Diam. Rd.,4.0,14.0
5,15.0,512-3640 Nisl Rd.,5.0,15.0
6,16.0,416-6030 Vivamus Road,,16.0
7,17.0,3859 Mauris Ave,,17.0
8,18.0,3433 Arcu. Rd.,,


## combine_first

In [46]:
#
# A diferencia de update, el 
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.index = range(2,9)
df_1.combine_first(df_2)
df_1

Unnamed: 0,clientId,name,bonus,costs
0,10,Omar Y. Fletcher,0,9999.0
1,11,Buffy W. Vincent,1,
2,12,Mira N. Franklin,2,
3,13,Ferris Q. Le,3,
4,14,Michael I. Gray,4,
5,15,Alan T. Mullins,5,


In [47]:
#
# A diferencia de update, el 
df_1 = pd.read_csv("/tmp/data_1.csv")
df_2 = pd.read_csv("/tmp/data_2.csv")
df_2.index = range(2,9)
df_2.combine_first(df_1)
df_2

Unnamed: 0,clientId,location,bonus,costs
2,12,"P.O. Box 445, 323 Cursus Rd.",,12.0
3,13,Ap #791-3809 Eu Street,,13.0
4,14,6715 Diam. Rd.,,14.0
5,15,512-3640 Nisl Rd.,9999.0,15.0
6,16,416-6030 Vivamus Road,,16.0
7,17,3859 Mauris Ave,,17.0
8,18,3433 Arcu. Rd.,,
