## Combining datasets
Pandas provides various facilities for easily combining tother Series or Dataframes.
- **Concat:** combining DataFrames accross rows or columns
- **Join:** combining data on a key columnor an index.
- **Merge:** combining data on common columns or indexes.

In [1]:
from helpers import sample_df, hdisplay, nowrap_display
import pandas as pd

In [11]:
# Sample data
left = sample_df("A0", "D3", prefix="L_")
right = sample_df("A0", "D3", prefix="R_")

hdisplay([left, right], ["Left", "Right"])

Unnamed: 0,A,B,C,D
0,L_A0,L_B0,L_C0,L_D0
1,L_A1,L_B1,L_C1,L_D1
2,L_A2,L_B2,L_C2,L_D2
3,L_A3,L_B3,L_C3,L_D3

Unnamed: 0,A,B,C,D
0,R_A0,R_B0,R_C0,R_D0
1,R_A1,R_B1,R_C1,R_D1
2,R_A2,R_B2,R_C2,R_D2
3,R_A3,R_B3,R_C3,R_D3


In [16]:
hdisplay([
    pd.concat([left, right], axis="index"),
    pd.concat([left, right], axis="columns")],
    ["axis='index'", "axis='columns'"]
)

Unnamed: 0,A,B,C,D
0,L_A0,L_B0,L_C0,L_D0
1,L_A1,L_B1,L_C1,L_D1
2,L_A2,L_B2,L_C2,L_D2
3,L_A3,L_B3,L_C3,L_D3
0,R_A0,R_B0,R_C0,R_D0
1,R_A1,R_B1,R_C1,R_D1
2,R_A2,R_B2,R_C2,R_D2
3,R_A3,R_B3,R_C3,R_D3

Unnamed: 0,A,B,C,D,A.1,B.1,C.1,D.1
0,L_A0,L_B0,L_C0,L_D0,R_A0,R_B0,R_C0,R_D0
1,L_A1,L_B1,L_C1,L_D1,R_A1,R_B1,R_C1,R_D1
2,L_A2,L_B2,L_C2,L_D2,R_A2,R_B2,R_C2,R_D2
3,L_A3,L_B3,L_C3,L_D3,R_A3,R_B3,R_C3,R_D3


In [26]:
# pd.concat([left, right]).reset_index(drop=True)
# pd.concat([left, right]).set_index("C")

hdisplay([
    pd.concat([left, right], ignore_index=False),
    pd.concat([left, right], ignore_index=True),
    pd.concat([left, right], axis='columns', ignore_index=True)],
    ["ignore_index=False", "ignore_index=True", "axis='columns', ignore_index=True"]
)

Unnamed: 0,A,B,C,D
0,L_A0,L_B0,L_C0,L_D0
1,L_A1,L_B1,L_C1,L_D1
2,L_A2,L_B2,L_C2,L_D2
3,L_A3,L_B3,L_C3,L_D3
0,R_A0,R_B0,R_C0,R_D0
1,R_A1,R_B1,R_C1,R_D1
2,R_A2,R_B2,R_C2,R_D2
3,R_A3,R_B3,R_C3,R_D3

Unnamed: 0,A,B,C,D
0,L_A0,L_B0,L_C0,L_D0
1,L_A1,L_B1,L_C1,L_D1
2,L_A2,L_B2,L_C2,L_D2
3,L_A3,L_B3,L_C3,L_D3
4,R_A0,R_B0,R_C0,R_D0
5,R_A1,R_B1,R_C1,R_D1
6,R_A2,R_B2,R_C2,R_D2
7,R_A3,R_B3,R_C3,R_D3

Unnamed: 0,0,1,2,3,4,5,6,7
0,L_A0,L_B0,L_C0,L_D0,R_A0,R_B0,R_C0,R_D0
1,L_A1,L_B1,L_C1,L_D1,R_A1,R_B1,R_C1,R_D1
2,L_A2,L_B2,L_C2,L_D2,R_A2,R_B2,R_C2,R_D2
3,L_A3,L_B3,L_C3,L_D3,R_A3,R_B3,R_C3,R_D3


In [27]:
# New sample data
left = sample_df("A0", "D3", prefix="L_")
right = sample_df("C2", "F5", prefix="R_")

hdisplay([left, right], ["Left", "Right"])

Unnamed: 0,A,B,C,D
0,L_A0,L_B0,L_C0,L_D0
1,L_A1,L_B1,L_C1,L_D1
2,L_A2,L_B2,L_C2,L_D2
3,L_A3,L_B3,L_C3,L_D3

Unnamed: 0,C,D,E,F
2,R_C2,R_D2,R_E2,R_F2
3,R_C3,R_D3,R_E3,R_F3
4,R_C4,R_D4,R_E4,R_F4
5,R_C5,R_D5,R_E5,R_F5


In [39]:
hdisplay([
    pd.concat([left, right], axis='index'),
    pd.concat([left, right], axis='columns', join='outer'),  # union (default)
    pd.concat([left, right], axis='columns', join='inner')], # intersection
    ["axis='index'", "axis='columns', join='outer'", "axis='columns', join='inner'"],
    20
)

Unnamed: 0,A,B,C,D,E,F
0,L_A0,L_B0,L_C0,L_D0,,
1,L_A1,L_B1,L_C1,L_D1,,
2,L_A2,L_B2,L_C2,L_D2,,
3,L_A3,L_B3,L_C3,L_D3,,
2,,,R_C2,R_D2,R_E2,R_F2
3,,,R_C3,R_D3,R_E3,R_F3
4,,,R_C4,R_D4,R_E4,R_F4
5,,,R_C5,R_D5,R_E5,R_F5

Unnamed: 0,A,B,C,D,C.1,D.1,E,F
0,L_A0,L_B0,L_C0,L_D0,,,,
1,L_A1,L_B1,L_C1,L_D1,,,,
2,L_A2,L_B2,L_C2,L_D2,R_C2,R_D2,R_E2,R_F2
3,L_A3,L_B3,L_C3,L_D3,R_C3,R_D3,R_E3,R_F3
4,,,,,R_C4,R_D4,R_E4,R_F4
5,,,,,R_C5,R_D5,R_E5,R_F5

Unnamed: 0,A,B,C,D,C.1,D.1,E,F
2,L_A2,L_B2,L_C2,L_D2,R_C2,R_D2,R_E2,R_F2
3,L_A3,L_B3,L_C3,L_D3,R_C3,R_D3,R_E3,R_F3


In [58]:
hdisplay([
    pd.concat([left, right], axis='index', keys=['left', 'right']),
    pd.concat([left, right], axis='columns', keys=['left', 'right'])],
    ["axis='index', keys=['left', 'right']", "axis='columns', keys=['left', 'right']"]
)

Unnamed: 0,Unnamed: 1,A,B,C,D,E,F
left,0,L_A0,L_B0,L_C0,L_D0,,
left,1,L_A1,L_B1,L_C1,L_D1,,
left,2,L_A2,L_B2,L_C2,L_D2,,
left,3,L_A3,L_B3,L_C3,L_D3,,
right,2,,,R_C2,R_D2,R_E2,R_F2
right,3,,,R_C3,R_D3,R_E3,R_F3
right,4,,,R_C4,R_D4,R_E4,R_F4
right,5,,,R_C5,R_D5,R_E5,R_F5

Unnamed: 0_level_0,left,left,left,left,right,right,right,right
Unnamed: 0_level_1,A,B,C,D,C,D,E,F
0,L_A0,L_B0,L_C0,L_D0,,,,
1,L_A1,L_B1,L_C1,L_D1,,,,
2,L_A2,L_B2,L_C2,L_D2,R_C2,R_D2,R_E2,R_F2
3,L_A3,L_B3,L_C3,L_D3,R_C3,R_D3,R_E3,R_F3
4,,,,,R_C4,R_D4,R_E4,R_F4
5,,,,,R_C5,R_D5,R_E5,R_F5
