# Merge
- Concat 
- Join   
- Append 

In [1]:
import pandas as pd
import numpy as np

starting_date = '20210701'
sample_numpy_data = np.array(np.arange(24)).reshape((6,4))
dates_index = pd.date_range(starting_date, periods=6)
sample_df = pd.DataFrame(sample_numpy_data, index=dates_index, columns=list('ABCD'))

sample_df_2 = sample_df.copy()
sample_df_2['Fruits'] = ['apple', 'orange','banana','strawberry','blueberry','pineapple']

sample_series = pd.Series([1,2,3,4,5,6], index=pd.date_range(starting_date, periods=6))
sample_df_2['Extra Data'] = sample_series *3 +1

second_numpy_array = np.array(np.arange(len(sample_df_2)))  *100 + 7
sample_df_2['G'] = second_numpy_array

sample_df_2

Unnamed: 0,A,B,C,D,Fruits,Extra Data,G
2021-07-01,0,1,2,3,apple,4,7
2021-07-02,4,5,6,7,orange,7,107
2021-07-03,8,9,10,11,banana,10,207
2021-07-04,12,13,14,15,strawberry,13,307
2021-07-05,16,17,18,19,blueberry,16,407
2021-07-06,20,21,22,23,pineapple,19,507


In [2]:
len(sample_df_2)

6

### concat()
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html
##### separate data frame into a list with 3 elements

In [3]:
pieces = [sample_df_2[:2], sample_df_2[2:4], sample_df_2[4:]]
pieces

[            A  B  C  D  Fruits  Extra Data    G
 2021-07-01  0  1  2  3   apple           4    7
 2021-07-02  4  5  6  7  orange           7  107,
              A   B   C   D      Fruits  Extra Data    G
 2021-07-03   8   9  10  11      banana          10  207
 2021-07-04  12  13  14  15  strawberry          13  307,
              A   B   C   D     Fruits  Extra Data    G
 2021-07-05  16  17  18  19  blueberry          16  407
 2021-07-06  20  21  22  23  pineapple          19  507]

##### concatenate first and last elements

In [6]:
pd.concat([pieces[0], pieces[2]], axis=1)

Unnamed: 0,A,B,C,D,Fruits,Extra Data,G,A.1,B.1,C.1,D.1,Fruits.1,Extra Data.1,G.1
2021-07-01,0.0,1.0,2.0,3.0,apple,4.0,7.0,,,,,,,
2021-07-02,4.0,5.0,6.0,7.0,orange,7.0,107.0,,,,,,,
2021-07-05,,,,,,,,16.0,17.0,18.0,19.0,blueberry,16.0,407.0
2021-07-06,,,,,,,,20.0,21.0,22.0,23.0,pineapple,19.0,507.0


### append()
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.append.html

In [7]:
new_last_row = sample_df_2.iloc[2]
new_last_row

A                  8
B                  9
C                 10
D                 11
Fruits        banana
Extra Data        10
G                207
Name: 2021-07-03 00:00:00, dtype: object

In [9]:
sample_df_2.append(new_last_row)


Unnamed: 0,A,B,C,D,Fruits,Extra Data,G
2021-07-01,0,1,2,3,apple,4,7
2021-07-02,4,5,6,7,orange,7,107
2021-07-03,8,9,10,11,banana,10,207
2021-07-04,12,13,14,15,strawberry,13,307
2021-07-05,16,17,18,19,blueberry,16,407
2021-07-06,20,21,22,23,pineapple,19,507
2021-07-03,8,9,10,11,banana,10,207


### merge()
documentation: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.merge.html
        
Merge DataFrame objects by performing a database-style join operation by columns or indexes.

If joining columns on columns, the DataFrame indexes will be ignored. Otherwise if joining indexes on indexes or indexes on a column or columns, the index will be passed on.

In [13]:
left = pd.DataFrame({'my_key': ['K0', 'K1', 'K2', 'K3'],
 'A': ['A0', 'A1', 'A2', 'A3'],
 'B': ['B0', 'B1', 'B2', 'B3']})
# right = pd.DataFrame({'my_key': ['K0', 'K1', 'K2', 'K3'],
right = pd.DataFrame({'my_key': ['K1', 'K2', 'K3', 'K4'],
 'C': ['C0', 'C1', 'C2', 'C3'],
 'D': ['D0', 'D1', 'D2', 'D3']})

In [11]:
left

Unnamed: 0,my_key,A,B
0,K0,A0,B0
1,K1,A1,B1
2,K2,A2,B2
3,K3,A3,B3


In [12]:
right

Unnamed: 0,my_key,C,D
0,K0,C0,D0
1,K1,C1,D1
2,K2,C2,D2
3,K3,C3,D3


In [14]:
result = pd.merge(left, right, on="my_key")
result

Unnamed: 0,my_key,A,B,C,D
0,K1,A1,B1,C0,D0
1,K2,A2,B2,C1,D1
2,K3,A3,B3,C2,D2


So we see that in contrast to pd.concat the merge needs to have common index, while pd.concat joins both indexes. 