# Merge & Concat

---
It is usually used on SQL enviroment but also can be applied to Matrix.
Let's check around how this can be performed


In [1]:
import numpy as np
import pandas as pd

# Concat

---
*   ### Now! for subsequent scenarios we are going to create a dataframe

In [7]:
df_1= pd.DataFrame({'A':['A0','A1','A2','A3'],
                    'B':['B0','B1','B2','B3'],
                    'C':['C0','C1','C2','C3'],
                    'D':['D0','D1','D2','D3']})

In [5]:
df_2= pd.DataFrame({'A':['A4','A5','A6','A7'],
                    'B':['B4','B5','B6','B7'],
                    'C':['C4','C5','C6','C7'],
                    'D':['D4','D5','D6','D7']})

### Now let's use concat to join both dataframes

In [9]:
pd.concat([df_1, df_2], ignore_index=True)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3
4,A4,B4,C4,D4
5,A5,B5,C5,D5
6,A6,B6,C6,D6
7,A7,B7,C7,D7


In [10]:
pd.concat([df_1, df_2], axis=1)

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,A0,B0,C0,D0,A4,B4,C4,D4
1,A1,B1,C1,D1,A5,B5,C5,D5
2,A2,B2,C2,D2,A6,B6,C6,D6
3,A3,B3,C3,D3,A7,B7,C7,D7


# Merge

---

*   Let's create a new DataFrame to perform Merge loops

In [11]:
left = pd.DataFrame({'Key' :['KO','K1','K2','K3'],
                     'A' :['AO','A1','A2','A3'],
                     'B' :['BO','B1','B2','B3'],})

In [12]:
right = pd.DataFrame({'Key' :['KO','K1','K2','K3'],
                      'C' :['CO','C1','C2','C3'],
                      'D' :['DO','D1','D2','D3'],})

### Let's join the Right DataFrame to the Left one

In [13]:
left.merge(right)

Unnamed: 0,Key,A,B,C,D
0,KO,AO,BO,CO,DO
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


### Sometines is way better to specify in order to keep a cleaner coding, though...

In [15]:
left.merge(right, on='Key')

Unnamed: 0,Key,A,B,C,D
0,KO,AO,BO,CO,DO
1,K1,A1,B1,C1,D1
2,K2,A2,B2,C2,D2
3,K3,A3,B3,C3,D3


### Let's setup the previous DataFrames, nonetheless, I'll differentiate the key on the second frame

In [16]:
left = pd.DataFrame({'Key' :['KO','K1','K2','K3'],
                     'A' :['AO','A1','A2','A3'],
                     'B' :['BO','B1','B2','B3'],})

right = pd.DataFrame({'Key_2' :['KO','K1','K2','K3'],
                      'C' :['CO','C1','C2','C3'],
                      'D' :['DO','D1','D2','D3'],})

### Let's perform the previous exercise, but, I'm now using the left & right_on to look up on both DataFrame and referencing their exact keys in order to retrieved the new ones

In [17]:
left.merge(right, left_on='Key', right_on='Key_2')

Unnamed: 0,Key,A,B,Key_2,C,D
0,KO,AO,BO,KO,CO,DO
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,K3,A3,B3,K3,C3,D3


### Let's move forward & change one more time these DataFrames

In [23]:
left = pd.DataFrame({'Key' :['KO','K1','K2','K3'],
                     'A' :['AO','A1','A2','A3'],
                     'B' :['BO','B1','B2','B3'],})

right = pd.DataFrame({'Key_2' :['KO','K1','K2',np.nan],
                      'C' :['CO','C1','C2','C3'],
                      'D' :['DO','D1','D2','D3'],})

In [24]:
right

Unnamed: 0,Key_2,C,D
0,KO,CO,DO
1,K1,C1,D1
2,K2,C2,D2
3,,C3,D3


### The command "how=" will help us to bring the values within the left join and show them regardless the "Key_2" might has Nan Values.

In [26]:
left.merge(right, left_on='Key', right_on='Key_2', how='left')

Unnamed: 0,Key,A,B,Key_2,C,D
0,KO,AO,BO,KO,CO,DO
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,K3,A3,B3,,,


### Inner is the value by default

In [27]:
left.merge(right, left_on='Key', right_on='Key_2', how='inner')

Unnamed: 0,Key,A,B,Key_2,C,D
0,KO,AO,BO,KO,CO,DO
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2


### Right will bring the values on the right join. Since left join has no considential values with the right join, then, will drop off NaN values as well for them.

In [28]:
left.merge(right, left_on='Key', right_on='Key_2', how='right')

Unnamed: 0,Key,A,B,Key_2,C,D
0,KO,AO,BO,KO,CO,DO
1,K1,A1,B1,K1,C1,D1
2,K2,A2,B2,K2,C2,D2
3,,,,,C3,D3
