In [1]:
import pandas as pd

df1 = pd.DataFrame({'lkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [1, 2, 3, 5]})

df2 = pd.DataFrame({'rkey': ['foo', 'bar', 'baz', 'foo'],
                    'value': [5, 6, 7, 8]})

In [2]:
print(df1)

  lkey  value
0  foo      1
1  bar      2
2  baz      3
3  foo      5


In [3]:
df2

Unnamed: 0,rkey,value
0,foo,5
1,bar,6
2,baz,7
3,foo,8


There are two columns in each dataframe. Let's combine them based on `lkey` and `rkey`:

In [4]:
df_merged = pd.merge(left = df1, right = df2, how = 'inner', left_on='lkey', right_on='rkey')

print(df_merged)

  lkey  value_x rkey  value_y
0  foo        1  foo        5
1  foo        1  foo        8
2  foo        5  foo        5
3  foo        5  foo        8
4  bar        2  bar        6
5  baz        3  baz        7


What is/are the overlapping column(s)? `value`<br>
`left_on` indicates column to join on in the left dataframe.<br>
`how = inner` is the defualt behavior. It uses the intersection of keys from both frames.

Now, let's combine them based on `value`:

In [6]:
df_merged = pd.merge(left = df1, right = df2, how = 'inner', left_on='value', right_on='value')

print(df_merged)

  lkey  value rkey
0  foo      5  foo


Let's merge two datasets:

In [27]:
df1 = pd.DataFrame({'name': ['foo', 'bar', 'baz'],
                    'attribute_1': [1, 2, 3]})
# Add another "foo" and see the result

df1.set_index('name',inplace=True)

df2 = pd.DataFrame({'name': ['foo', 'bar', 'baz'],
                    'attribute_2': [5, 6, 7]})

df2.set_index('name',inplace=True)

df_merged = pd.merge(left= df1, right = df2, left_index = True, right_index = True)
# left_index
# Use the index from the left DataFrame as the join key(s).

print(df_merged)

      attribute_1  attribute_2
name                          
foo             1            5
bar             2            6
baz             3            7


Is there a way to sort data?

In [29]:
df_merged_sorted = df_merged.sort_values(by = 'name')

In [30]:
print(df_merged_sorted)

      attribute_1  attribute_2
name                          
bar             2            6
baz             3            7
foo             1            5
