In [1]:
import numpy as np, pandas as pd

### 下列資料依照指定規則做合併：

```
- 依照 fruit 欄位做合併
- 依照 index 索引做合併
```


In [2]:
df1 = pd.DataFrame({
    'fruit': ['apple', 'banana', 'orange'] * 3,
    'weight': ['high', 'medium', 'low'] * 3,
    'price': np.random.randint(0, 15, 9)
})

df2 = pd.DataFrame({
    'fruit': ['apple', 'orange', 'pine'] * 2,
    'weight': ['high', 'low'] * 3,
    'price': np.random.randint(0, 15, 6)
})
print(df1)
print(df2)

    fruit  weight  price
0   apple    high     11
1  banana  medium      2
2  orange     low      1
3   apple    high      1
4  banana  medium      3
5  orange     low      2
6   apple    high     10
7  banana  medium      0
8  orange     low     10
    fruit weight  price
0   apple   high      0
1  orange    low     12
2    pine   high     14
3   apple    low      7
4  orange   high      6
5    pine    low      1


In [3]:
# - 依照 fruit 欄位做合併

print(pd.merge(df1, df2, on='fruit'))

     fruit weight_x  price_x weight_y  price_y
0    apple     high       11     high        0
1    apple     high       11      low        7
2    apple     high        1     high        0
3    apple     high        1      low        7
4    apple     high       10     high        0
5    apple     high       10      low        7
6   orange      low        1      low       12
7   orange      low        1     high        6
8   orange      low        2      low       12
9   orange      low        2     high        6
10  orange      low       10      low       12
11  orange      low       10     high        6


In [4]:
# - 依照 index 索引做合併

print(df1.join(df2))

ValueError: columns overlap but no suffix specified: Index(['fruit', 'weight', 'price'], dtype='object')

### 為什麼依照 merge 合併後有些資料不見了？


In [5]:
# 因為merge中預設是取資料的聯集(inner)

print(pd.merge(df1, df2, on='fruit', how = 'outer'))

     fruit weight_x  price_x weight_y  price_y
0    apple     high     11.0     high      0.0
1    apple     high     11.0      low      7.0
2    apple     high      1.0     high      0.0
3    apple     high      1.0      low      7.0
4    apple     high     10.0     high      0.0
5    apple     high     10.0      low      7.0
6   banana   medium      2.0      NaN      NaN
7   banana   medium      3.0      NaN      NaN
8   banana   medium      0.0      NaN      NaN
9   orange      low      1.0      low     12.0
10  orange      low      1.0     high      6.0
11  orange      low      2.0      low     12.0
12  orange      low      2.0     high      6.0
13  orange      low     10.0      low     12.0
14  orange      low     10.0     high      6.0
15    pine      NaN      NaN     high     14.0
16    pine      NaN      NaN      low      1.0


### 為什麼依照 index 合併會發生錯誤？請用程式解決。

In [6]:
# 列重疊但未指定後綴，指定後綴即可解決

print(df1.join(df2,lsuffix='_df1', rsuffix='_df2'))

  fruit_df1 weight_df1  price_df1 fruit_df2 weight_df2  price_df2
0     apple       high         11     apple       high        0.0
1    banana     medium          2    orange        low       12.0
2    orange        low          1      pine       high       14.0
3     apple       high          1     apple        low        7.0
4    banana     medium          3    orange       high        6.0
5    orange        low          2      pine        low        1.0
6     apple       high         10       NaN        NaN        NaN
7    banana     medium          0       NaN        NaN        NaN
8    orange        low         10       NaN        NaN        NaN
