# Union y concatenación de DataFrames

## Unión de DataFrames usando el método .merge()

In [3]:
import numpy as np
import pandas as pd

# Read 'sp500.csv' into a DataFrame: sp500
sp500 = pd.read_csv('data/sp500.csv', index_col='Date', parse_dates=True)

# Read 'exchange.csv' into a DataFrame: exchange
exchange = pd.read_csv('data/exchange.csv', index_col='Date', parse_dates=True)

Al unir dos DataFrame se puede especificar la columna que relaciona los dos DataFrame.

In [None]:
# Merge sp500 with exchange on 'Date': merge_by_date
merge_by_date = pd.merge(sp500, exchange, on='Date')

# Print merge_by_date
merge_by_date.head()

## Concatenación entre columnas de diferente nombre con left_on y right_on

In [None]:
# Read 'players.csv' into a DataFrame: players
players = pd.read_csv('data/nba/players.csv', index_col='Index')

# Read 'player_data.csv' into a DataFrame: player_data
player_data = pd.read_csv('data/nba/player_data.csv')

# Print players
players.head()

In [None]:
# Print player_data
player_data.head()

Se usa **left_on** para indicar la columna del primer DataFrame y **right_on** para el de la derecha.

In [None]:
# Merge players with players left on 'Player' and right on 'name': merge_by_name
merge_by_name = pd.merge(players, player_data, left_on='Player', right_on='name')

# Print player_data
merge_by_name.head()

Con el parámetro **suffixes** se pueden definir los sufijos para diferenciar las columnas con igual nombre.

In [None]:
# Merge players with players left on 'Player' and right on 'name' and suffixes: merge_by_name_suffix
merge_by_name_suffix = pd.merge(players, player_data, left_on='Player', right_on='name', suffixes=['_l', '_r'])

# Print player_data
merge_by_name_suffix.head()

Para hacer merge usando varias columnas se debe definir una lista para el parámetro **on**.

In [None]:
# Update players columns
players.columns = ['name','height','weight','college','born','birth_city','birth_state']

# Merge players with players on 'name' and 'college' and suffixes: merge_by_name_suffix
merge_by_name_suffix = pd.merge(players, player_data, on=['name', 'college'], suffixes=['_left', '_right'])

# Print player_data
merge_by_name_suffix.head()