In [2]:
# Pandas
import pandas as pd

In [3]:
# Concat
# joining data frames along the axis (rows or columns)

data1 = pd.DataFrame({
    "A": [1, 2, 3, 4],
    "B": [5, 6, 7, 8]
}, index=[1, 2, 3, 4])

data2 = pd.DataFrame({
    "A": [1, 2, 3, 4],
    "B": [5, 6, 7, 8]
}, index=[5, 6, 7, 8])

print(pd.concat([data1, data2]))
print("-" * 30)
print(pd.concat([data1, data2], axis=1))

   A  B
1  1  5
2  2  6
3  3  7
4  4  8
5  1  5
6  2  6
7  3  7
8  4  8
------------------------------
     A    B    A    B
1  1.0  5.0  NaN  NaN
2  2.0  6.0  NaN  NaN
3  3.0  7.0  NaN  NaN
4  4.0  8.0  NaN  NaN
5  NaN  NaN  1.0  5.0
6  NaN  NaN  2.0  6.0
7  NaN  NaN  3.0  7.0
8  NaN  NaN  4.0  8.0


In [14]:

# Merge
# joining data frames along a specific column common in both frames with common values 
# like foriegn key in SQL -> joining can be outer/inner join like SQL

data1 = pd.DataFrame({
    "A": [1, 2, 3, 4],
    "B": [5, 6, 7, 8]
}, index=[1, 2, 3, 4])

data2 = pd.DataFrame({
    "A": [1, 2, 5, 6],
    "D": [5, 6, 7, 8]
}, index=[5, 6, 7, 8])

print(pd.merge(data1, data2, on="A"))
print("-" * 30)
print(pd.merge(data1, data2, on="A", how="outer")) # Taking union of both frames (all possible values of 'A'), missing values are NaN
print("-" * 30)
print(pd.merge(data1, data2, on="A", how="inner")) # Taking only the intersection betwen the two frames (only common values of 'A')

   A  B  D
0  1  5  5
1  2  6  6
------------------------------
   A    B    D
0  1  5.0  5.0
1  2  6.0  6.0
2  3  7.0  NaN
3  4  8.0  NaN
4  5  NaN  7.0
5  6  NaN  8.0
------------------------------
   A  B  D
0  1  5  5
1  2  6  6


In [15]:

# Join
# it is not a function in pandas -> it is a function in the data frame object
# joining data frames (like merge but no common column) -> joining along the index
# like merge -> can be outer/inner joins like SQL

data1 = pd.DataFrame({
    "A": [1, 2, 3, 4],
    "B": [5, 6, 7, 8]
}, index=[1, 2, 3, 4])

data2 = pd.DataFrame({
    "C": [1, 2, 5, 6],
    "D": [5, 6, 7, 8]
}, index=[1, 3, 5, 7])

print(data1.join(data2))
print("-" * 30)
print(data1.join(data2, how="outer"))
print("-" * 30)
print(data1.join(data2, how="inner"))
print("-" * 30)

   A  B    C    D
1  1  5  1.0  5.0
2  2  6  NaN  NaN
3  3  7  2.0  6.0
4  4  8  NaN  NaN
------------------------------
     A    B    C    D
1  1.0  5.0  1.0  5.0
2  2.0  6.0  NaN  NaN
3  3.0  7.0  2.0  6.0
4  4.0  8.0  NaN  NaN
5  NaN  NaN  5.0  7.0
7  NaN  NaN  6.0  8.0
------------------------------
   A  B  C  D
1  1  5  1  5
3  3  7  2  6
------------------------------
