In [1]:
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [5]:
# First in just Numpy
# Create a matrix 
arr1 = np.arange(9).reshape((3,3))

# Show
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [3]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [4]:
# Let's see other axis options
np.concatenate([arr1,arr1],axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [9]:
# Now let's see how this works in pandas
# Lets create two Series with no overlap
ser1 =  Series([0,1,2],index=['T','U','V'])

ser2 = Series([3,4],index=['X','Y'])

In [7]:
ser1

T    0
U    1
V    2
dtype: int64

In [8]:
ser2

X    3
Y    4
dtype: int64

In [10]:
#Now let use concat (default is axis=0)
pd.concat([ser1,ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [13]:
# Now passing along another axis will produce a DataFrame
pd.concat([ser1,ser2],axis=1)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [14]:
pd.concat([ser1,ser2],axis=1,sort=False)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [15]:
# We can specify which specific axes to be used
pd.concat([ser1,ser2],axis=1,join_axes=[['U','V','Y']])

Unnamed: 0,0,1
U,1.0,
V,2.0,
Y,,4.0


In [16]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([ser1,ser2],keys=['cat1','cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [17]:
# Along the axis=1 then these Keys become column headers
pd.concat([ser1,ser2],axis=1,keys=['cat1','cat2'])

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  


Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [18]:
#Lastly, everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [20]:
dframe1

Unnamed: 0,X,Y,Z
0,-0.552676,-0.627563,0.055718
1,-0.424107,2.09008,0.598019
2,-0.129408,-0.620725,0.024695
3,-0.551913,0.701128,1.107509


In [21]:
dframe2

Unnamed: 0,Y,Q,X
0,0.370074,0.975276,1.386559
1,-1.549763,-1.508498,-0.071112
2,0.025711,-0.17972,-0.060477


In [24]:
pd.concat([dframe1,dframe2],sort=True)

Unnamed: 0,Q,X,Y,Z
0,,-0.552676,-0.627563,0.055718
1,,-0.424107,2.09008,0.598019
2,,-0.129408,-0.620725,0.024695
3,,-0.551913,0.701128,1.107509
0,0.975276,1.386559,0.370074,
1,-1.508498,-0.071112,-1.549763,
2,-0.17972,-0.060477,0.025711,


In [25]:
#to get a continous index
pd.concat([dframe1,dframe2],sort=True,ignore_index=True)

Unnamed: 0,Q,X,Y,Z
0,,-0.552676,-0.627563,0.055718
1,,-0.424107,2.09008,0.598019
2,,-0.129408,-0.620725,0.024695
3,,-0.551913,0.701128,1.107509
4,0.975276,1.386559,0.370074,
5,-1.508498,-0.071112,-1.549763,
6,-0.17972,-0.060477,0.025711,


In [29]:
#For more info in documentation:
url='http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html'
import webbrowser as wb
wb.open_new(url)


True

In [27]:
#Next up: More on Combining DataFrames with Overlapping Indexes!