In [1]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df1 = pd.DataFrame({
    'city': ['new york', 'chicago', 'orlando'],
    'temprature': [21, 14, 35]
})

In [3]:
df1

Unnamed: 0,city,temprature
0,new york,21
1,chicago,14
2,orlando,35


In [4]:
df2 = pd.DataFrame({
    'city': ['chicago', 'new york', 'orlando'],
    'humidity': [65, 68, 75]
})

In [5]:
df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,orlando,75


In [6]:
df3 = pd.merge(df1, df2, on='city')

In [7]:
df3

Unnamed: 0,city,temprature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [8]:
# incase of mismatch rows in the common column it will consider only common rows which are available
#in both the dataframe it behaves like inner join on the sql intersection.
df1 = pd.DataFrame({
    'city': ['new york', 'chicago', 'orlando', 'baltimore'],
    'temprature': [21, 14, 35, 32]
})

In [9]:
df2 = pd.DataFrame({
    'city': ['chicago', 'new york', 'san francisco'],
    'humidity': [65, 68, 71]
})

In [10]:
df3 = pd.merge(df1, df2, on='city')

In [11]:
df3

Unnamed: 0,city,temprature,humidity
0,new york,21,68
1,chicago,14,65


In [13]:
#code to to outer join 
df3 = pd.merge(df1, df2, on='city', how='outer')

In [14]:
df3

Unnamed: 0,city,temprature,humidity
0,baltimore,32.0,
1,chicago,14.0,65.0
2,new york,21.0,68.0
3,orlando,35.0,
4,san francisco,,71.0


In [16]:
# by default it is inner join without how parameter it will behave as inner
df3 = pd.merge(df1, df2, on='city', how='inner')

In [17]:
df3

Unnamed: 0,city,temprature,humidity
0,new york,21,68
1,chicago,14,65


In [20]:
# code to merge left join
# left and write is decide on the order of merge arguments.
df3 = pd.merge(df1, df2, on='city', how='left')

In [21]:
df3

Unnamed: 0,city,temprature,humidity
0,new york,21,68.0
1,chicago,14,65.0
2,orlando,35,
3,baltimore,32,


In [22]:
# code to merge right join
df3 = pd.merge(df1, df2, on='city', how='right')

In [23]:
df3

Unnamed: 0,city,temprature,humidity
0,chicago,14.0,65
1,new york,21.0,68
2,san francisco,,71


In [25]:
# code to bring indicator also with outer merge 
# it will inform us from which dataframe the data came.
df3 = pd.merge(df1, df2, on='city', how='outer', indicator=True)

In [26]:
df3

Unnamed: 0,city,temprature,humidity,_merge
0,baltimore,32.0,,left_only
1,chicago,14.0,65.0,both
2,new york,21.0,68.0,both
3,orlando,35.0,,left_only
4,san francisco,,71.0,right_only


In [27]:
df1 = pd.DataFrame({
    'city': ['new york', 'chicago', 'orlando', 'baltimore'],
    'temprature': [21, 14, 35, 38],
    'humidity': [65, 68, 71, 75]
})

In [28]:
df2 = pd.DataFrame({
    'city': ['chicago', 'new york', 'san digo'],
    'temprature': [21, 14, 35],
    'humidity': [65, 68, 71]
})

In [29]:
df2

Unnamed: 0,city,temprature,humidity
0,chicago,21,65
1,new york,14,68
2,san digo,35,71


In [32]:
# incase of some common column names present in both the data frames.
# it will add some suffix to the common column names.
df3 = pd.merge(df1, df2, on='city')

In [33]:
df3

Unnamed: 0,city,temprature_x,humidity_x,temprature_y,humidity_y
0,new york,21,65,14,68
1,chicago,14,68,21,65


In [34]:
# code to assign custom suffix to the dataframe.
df3 = pd.merge(df1, df2, on='city', suffixes=('_left', '_right'))

In [35]:
df3

Unnamed: 0,city,temprature_left,humidity_left,temprature_right,humidity_right
0,new york,21,65,14,68
1,chicago,14,68,21,65


In [36]:
df3 = pd.merge(df1, df2, on='city', suffixes=['_left', '_right'])

In [37]:
df3

Unnamed: 0,city,temprature_left,humidity_left,temprature_right,humidity_right
0,new york,21,65,14,68
1,chicago,14,68,21,65
