# Concatenating Along an Axis

In [1]:
import numpy as np
import pandas as pd

In [2]:
array = np.arange(12).reshape((3, 4))

In [3]:
array

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [4]:
np.concatenate([array, array], axis=1)

array([[ 0,  1,  2,  3,  0,  1,  2,  3],
       [ 4,  5,  6,  7,  4,  5,  6,  7],
       [ 8,  9, 10, 11,  8,  9, 10, 11]])

## 1. Series

In [5]:
series_1 = pd.Series([0, 1], index=['a', 'b'])
series_2 = pd.Series([2, 3, 4], index=['c', 'd', 'e'])
series_3 = pd.Series([5, 6], index=['f', 'g'])

In [6]:
pd.concat([series_1, series_2, series_3])

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [7]:
#axis=1
pd.concat([series_1, series_2, series_3], axis=1)

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


In [8]:
series_4 = pd.concat([series_1, series_3])
series_4

a    0
b    1
f    5
g    6
dtype: int64

In [9]:
#by default how='outer' = union
pd.concat([series_1, series_4], axis=1)

Unnamed: 0,0,1
a,0.0,0
b,1.0,1
f,,5
g,,6


In [10]:
pd.concat([series_1, series_4], axis=1, join='inner')

Unnamed: 0,0,1
a,0,0
b,1,1


In [11]:
#identifying concatenated DFs via 'keys'
result = pd.concat([series_1, series_1, series_3], keys=['one', 'two', 'three'])
result

one    a    0
       b    1
two    a    0
       b    1
three  f    5
       g    6
dtype: int64

In [12]:
result.unstack()

Unnamed: 0,a,b,f,g
one,0.0,1.0,,
two,0.0,1.0,,
three,,,5.0,6.0


In [13]:
#making concetaned DFs keys column names
pd.concat([series_1, series_2, series_3], axis=1, keys=['one', 'two', 'three'])

Unnamed: 0,one,two,three
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0


## 2. DataFrame

In [14]:
frame_1 = pd.DataFrame(np.arange(6).reshape((3, 2)),
                       index=list('abc'),
                       columns=['one', 'two'])

In [15]:
frame_2 = pd.DataFrame(5 + np.arange(4).reshape((2, 2)),
                       index=list('ac'),
                       columns=['three', 'four'])

In [16]:
frame_1

Unnamed: 0,one,two
a,0,1
b,2,3
c,4,5


In [17]:
frame_2

Unnamed: 0,three,four
a,5,6
c,7,8


In [18]:
pd.concat([frame_1, frame_2], axis=1, keys=['level_1', 'level_2'])

Unnamed: 0_level_0,level_1,level_1,level_2,level_2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


In [19]:
#naming column indexes
pd.concat([frame_1, frame_2], axis=1, keys=['level_1', 'level_2'], names=['upper', 'lower'])

upper,level_1,level_1,level_2,level_2
lower,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


### Vertical concat (rows)

In [20]:
frame_1 = pd.DataFrame(np.random.randn(3, 4), columns=['a', 'b', 'c', 'd'])
frame_2 = pd.DataFrame(np.random.randn(2, 3), columns=['b', 'd', 'a'])

In [21]:
frame_1

Unnamed: 0,a,b,c,d
0,1.951301,1.2591,0.337658,-0.015202
1,0.612649,-0.117599,0.36009,-0.993087
2,-0.907369,0.662312,1.02624,1.108294


In [22]:
frame_2

Unnamed: 0,b,d,a
0,1.197293,1.305714,0.115673
1,1.368362,-0.822852,1.232795


In [23]:
#ignoring index labels
pd.concat([frame_1, frame_2], ignore_index=True)

Unnamed: 0,a,b,c,d
0,1.951301,1.2591,0.337658,-0.015202
1,0.612649,-0.117599,0.36009,-0.993087
2,-0.907369,0.662312,1.02624,1.108294
3,0.115673,1.197293,,1.305714
4,1.232795,1.368362,,-0.822852
