# Examples in pandas for merge and concat

In [1]:
# Setup
import pandas as pd
df1 = pd.DataFrame([[11,'red'],[12,'blue'],[13,'green']], columns=['id', 'color'])
df2 = pd.DataFrame([[11,'red'],[12,'green'],[14,'yellow']], columns=['id', 'color'])

## Merge

In [2]:
print("df1:")
print(df1)

print("------------")
print("df2:")
print(df2)

print("------------")
print("Merged using left outer join:")
df = pd.merge(df1, df2, on='id', how='left')
print(df)

df1:
   id  color
0  11    red
1  12   blue
2  13  green
------------
df2:
   id   color
0  11     red
1  12   green
2  14  yellow
------------
Merged using left outer join:
   id color_x color_y
0  11     red     red
1  12    blue   green
2  13   green     NaN


In [3]:
print("Merged using full outer join:")
df = pd.merge(df1, df2, on='id', how='outer')
print(df)

Merged using full outer join:
     id color_x color_y
0  11.0     red     red
1  12.0    blue   green
2  13.0   green     NaN
3  14.0     NaN  yellow


In [4]:
print("Merged using inner join:")
df = pd.merge(df1, df2, on='id', how='inner')
print(df)

Merged using inner join:
   id color_x color_y
0  11     red     red
1  12    blue   green


In [5]:
# Left outer join operation using merge
df3 = pd.DataFrame([[11,'A'],[11,'B'],[12,'C'],[12,'B'],[13,'C']], columns=['pid', 'part'])

print("df1:")
print(df1)

print("------------")
print("df3:")
print(df3)

print("------------")
print("Merged using left outer join:")
df = pd.merge(df1, df3, left_on='id', right_on='pid', how='left') # if the names don't match, specify 'left_on' and 'right_on'
df = df.drop('pid', axis=1) # drop the column 'pid' because it is identical to 'id'
print(df)
print(">> This is the desired outcome for the common 'left outer join' operation (equivalent to Excel's vlookup)")

df1:
   id  color
0  11    red
1  12   blue
2  13  green
------------
df3:
   pid part
0   11    A
1   11    B
2   12    C
3   12    B
4   13    C
------------
Merged using left outer join:
   id  color part
0  11    red    A
1  11    red    B
2  12   blue    C
3  12   blue    B
4  13  green    C
>> This is the desired outcome for the common 'left outer join' operation (equivalent to Excel's vlookup)


## Concatenate

In [6]:
print("df1:")
print(df1)

print("------------")
print("df2:")
print(df2)

print("------------")

df = pd.concat([df1, df2], axis=1)
print("Column-wise concat:")
print(df)
print(">> Note the Nan values in the last 2 rows (df1 has 2 more rows of values than df2)")

df1:
   id  color
0  11    red
1  12   blue
2  13  green
------------
df2:
   id   color
0  11     red
1  12   green
2  14  yellow
------------
Column-wise concat:
   id  color  id   color
0  11    red  11     red
1  12   blue  12   green
2  13  green  14  yellow
>> Note the Nan values in the last 2 rows (df1 has 2 more rows of values than df2)


In [7]:
print("Row-wise concat:")
df = pd.concat([df1, df2])
print(df)
print(">> Note that NaN values are added to fill out the dataframe")

print("------------")
print("Reindex the rows:")
df = df.reset_index(drop=True)
print(df)
print(">> Note that index now runs in sequential order")

Row-wise concat:
   id   color
0  11     red
1  12    blue
2  13   green
0  11     red
1  12   green
2  14  yellow
>> Note that NaN values are added to fill out the dataframe
------------
Reindex the rows:
   id   color
0  11     red
1  12    blue
2  13   green
3  11     red
4  12   green
5  14  yellow
>> Note that index now runs in sequential order
