In [1]:
import pandas as pd
import numpy as np

# pd.concat

```python
pd.concat(
    objs,
    axis=0,
    join='outer',
    join_axes=None,
    ignore_index=False,
    keys=None,
    levels=None,
    names=None,
    verify_integrity=False,
    sort=None,
    copy=True,
)
```

In [3]:
school_A = pd.DataFrame([[1,2],[3,4]], index = ['Linda', 'Alex'], columns = ['Math', 'Physic'])
school_A

Unnamed: 0,Math,Physic
Linda,1,2
Alex,3,4


In [4]:
school_B = pd.DataFrame([[5,6],[7,8]], index = ['Ryan', 'John'], columns = ['Math', 'Physic'])
school_B

Unnamed: 0,Math,Physic
Ryan,5,6
John,7,8


In [8]:
school_C = pd.DataFrame([[5,2],[3,10]], index = ['Ryan', 'Peter'], columns = ['Math', 'Physic'])
school_C

Unnamed: 0,Math,Physic
Ryan,5,2
Peter,3,10


In [5]:
#vertical merging
#when we set ignore_index = True --> reset the index of the merged DataFrame to 0, 1, 2, ..., n - 1
pd.concat((school_A, school_B), axis = 0)

Unnamed: 0,Math,Physic
Linda,1,2
Alex,3,4
Ryan,5,6
John,7,8


## ignore_index

In [13]:
#ignore_index
pd.concat((school_A, school_B), axis = 0, ignore_index = True)


Unnamed: 0,Math,Physic
0,1,2
1,3,4
2,5,6
3,7,8


## verify_integrity

In [12]:
#verify integrity
try:
    pd.concat((school_C, school_B), axis = 0, verify_integrity=True)
except:
    print('Duplicated index error when set verify_integrity = True')
finally:
    print('turn off verify_integrity')
    print(pd.concat((school_C, school_B), verify_integrity = False))

Duplicated index error when set verify_integrity = True
turn off verify_integrity
       Math  Physic
Ryan      5       2
Peter     3      10
Ryan      5       6
John      7       8


## join

In [15]:
np.random.seed(101)
Trung = pd.DataFrame([[8.4, 7.25, 9]], columns = ['Math', 'Physic', 'English'])
Trung

Unnamed: 0,Math,Physic,English
0,8.4,7.25,9


In [14]:
Bich = pd.DataFrame([[7, 6.5, 8]], columns = ['Math', 'Chemical', 'Biology'])
Bich

Unnamed: 0,Math,Chemical,Biology
0,7,6.5,8


In [16]:
#inner join
pd.concat((Trung, Bich), axis = 0, join = 'inner')

Unnamed: 0,Math
0,8.4
0,7.0


In [19]:
#outer join, order column labels
pd.concat((Trung, Bich), axis = 0, join = 'outer', sort = True)

Unnamed: 0,Biology,Chemical,English,Math,Physic
0,,,9.0,8.4,7.25
0,8.0,6.5,,7.0,


## join_axes

In [21]:
Trung = pd.DataFrame([[1,2,3,4]], columns = ['Math', 'Biology', 'English', 'Physic'])
Trung

Unnamed: 0,Math,Biology,English,Physic
0,1,2,3,4


In [22]:
Kien = pd.DataFrame([[5,6,7,8]], columns = ['Math', 'English', 'Physic', 'Chemical'])
Kien

Unnamed: 0,Math,English,Physic,Chemical
0,5,6,7,8


Specific indexes to use for the other n - 1 axes instead of performing
    inner/outer set logic

In [23]:
#Only merge vertically on 2 columns: Math and English

columns = pd.Index(['Math', 'English'])
pd.concat([Trung, Kien], join_axes = [columns])

Unnamed: 0,Math,English
0,1,3
0,5,6


## keys

In [28]:
school_A

Unnamed: 0,Math,Physic
Linda,1,2
Alex,3,4


In [29]:
school_B

Unnamed: 0,Math,Physic
Ryan,5,6
John,7,8


In [32]:
#multiIndex
pd.concat((school_A, school_B), axis = 0, keys = ['school A', 'school B'])

Unnamed: 0,Unnamed: 1,Math,Physic
school A,Linda,1,2
school A,Alex,3,4
school B,Ryan,5,6
school B,John,7,8


## names

In [34]:
#set name for each level of each index
pd.concat((school_A, school_B), keys = ('school A', 'school B'), names = ['school name', 'student name'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Math,Physic
school name,student name,Unnamed: 2_level_1,Unnamed: 3_level_1
school A,Linda,1,2
school A,Alex,3,4
school B,Ryan,5,6
school B,John,7,8


# pd.append

```python
DataFrames.append(
    other,
    ignore_index=False,
    verify_integrity=False,
    sort=None,
)
```

<code><b>equivalent: pd.concat((DataFrame, other), axis = 0)</b></code>

In [39]:
school_A.append(school_B)

Unnamed: 0,Math,Physic
Linda,1,2
Alex,3,4
Ryan,5,6
John,7,8


In [40]:
#ignore_index
school_A.append(school_B, ignore_index = True)

Unnamed: 0,Math,Physic
0,1,2
1,3,4
2,5,6
3,7,8


In [42]:
try:
    school_B.append(school_C, verify_integrity= True)
except:
    print('duplicated index')

duplicated index


# combine_fisrt

Combine two DataFrame objects by filling null values in one DataFrame
with non-null values from other DataFrame. The row and column indexes
of the resulting DataFrame will be the union of the two.

In [47]:
df1 = pd.DataFrame([['I', 'Love', np.nan], ['Beautiful', np.nan, 'White']])
df1

Unnamed: 0,0,1,2
0,I,Love,
1,Beautiful,,White


In [45]:
information = pd.DataFrame([['*', '*', 'U'], ['*', 'In', '*']])
information

Unnamed: 0,0,1,2
0,*,*,U
1,*,In,*


In [48]:
df1.combine_first(information)

Unnamed: 0,0,1,2
0,I,Love,U
1,Beautiful,In,White


In [49]:
#can emagine equivalent(how to operate)
np.where(df1.isna(), information, df1)

array([['I', 'Love', 'U'],
       ['Beautiful', 'In', 'White']], dtype=object)

In [51]:
df1 = pd.DataFrame({'A': [None, 0], 'B': [4, None]})
df2 = pd.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])
df1.combine_first(df2)

Unnamed: 0,A,B,C
0,,4.0,
1,0.0,3.0,1.0
2,,3.0,1.0
