# Merge Dataframes

In [1]:
%autosave 60

Autosaving every 60 seconds


In [2]:
import pandas as pd
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})
df1

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35


In [3]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "humidity": [65,68,75],
})
df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,orlando,75


In [6]:
df3 = pd.merge(df1,df2 ,on = 'city')
df3

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [7]:
df4 = pd.DataFrame({
    "city": ["new york","chicago","orlando",'baltimore'],
    "temperature": [21,14,35,32],
})
df4

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35
3,baltimore,32


In [8]:
df5 = pd.DataFrame({
    "city": ["chicago","new york","san francisco"],
    "humidity": [65,68,71],
})
df5

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,san francisco,71


In [12]:
df6 = pd.merge(df4,df5)
df6

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65


In [13]:
df6 = pd.merge(df4,df5, how = 'outer') # default how = "inner"
df6

Unnamed: 0,city,temperature,humidity
0,new york,21.0,68.0
1,chicago,14.0,65.0
2,orlando,35.0,
3,baltimore,32.0,
4,san francisco,,71.0


![Screenshot%202021-10-05%20094938.png](attachment:Screenshot%202021-10-05%20094938.png)

In [14]:
df6 = pd.merge(df4,df5, how = 'left') # default how = "inner"
df6

Unnamed: 0,city,temperature,humidity
0,new york,21,68.0
1,chicago,14,65.0
2,orlando,35,
3,baltimore,32,


**Use Indicator flag in join**

In [16]:
df6 = pd.merge(df4,df5, how = 'outer', indicator = True) # default how = "inner"
df6

Unnamed: 0,city,temperature,humidity,_merge
0,new york,21.0,68.0,both
1,chicago,14.0,65.0,both
2,orlando,35.0,,left_only
3,baltimore,32.0,,left_only
4,san francisco,,71.0,right_only


**How to use "suffixes()" argument in dataframes?**

In [17]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando", "baltimore"],
    "temperature": [21,14,35,38],
    "humidity": [65,68,71, 75]
})
df1

Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [18]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","san diego"],
    "temperature": [21,14,35],
    "humidity": [65,68,71]
})
df2

Unnamed: 0,city,temperature,humidity
0,chicago,21,65
1,new york,14,68
2,san diego,35,71


In [19]:
df3 = pd.merge(df1,df2,on = 'city')
df3

Unnamed: 0,city,temperature_x,humidity_x,temperature_y,humidity_y
0,new york,21,65,14,68
1,chicago,14,68,21,65


In [22]:
df3 = pd.merge(df1,df2,on = 'city',how = 'outer',suffixes = ('_left', '_right'))
df3

Unnamed: 0,city,temperature_left,humidity_left,temperature_right,humidity_right
0,new york,21.0,65.0,14.0,68.0
1,chicago,14.0,68.0,21.0,65.0
2,orlando,35.0,71.0,,
3,baltimore,38.0,75.0,,
4,san diego,,,35.0,71.0


In [23]:
df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando"],
    "temperature": [21,14,35],
})
df1.set_index('city',inplace=True)
df1

Unnamed: 0_level_0,temperature
city,Unnamed: 1_level_1
new york,21
chicago,14
orlando,35


In [30]:

df2 = pd.DataFrame({
    "city": ["chicago","new york","orlando"],
    "temperature": [10,20,30],
    "humidity": [65,68,75],
})
df2.set_index('city',inplace=True)
df2

Unnamed: 0_level_0,temperature,humidity
city,Unnamed: 1_level_1,Unnamed: 2_level_1
chicago,10,65
new york,20,68
orlando,30,75


In [31]:
df1.join(df2, lsuffix='_l', rsuffix='_r')

Unnamed: 0_level_0,temperature_l,temperature_r,humidity
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
new york,21,20,68
chicago,14,10,65
orlando,35,30,75


In [28]:
df1.join(df2)

Unnamed: 0_level_0,temperature,humidity
city,Unnamed: 1_level_1,Unnamed: 2_level_1
new york,21,68
chicago,14,65
orlando,35,75


<div class = "alert alert-block alert-success" >
<b> Success Merge Dataframes </b>
</div>