In [1]:
# Join dataframes and matrices together
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# First in just Numpy

In [3]:
# Create a 9 point matrix
# Reshape to 3 by 3 matrix
arr1 = np.arange(9).reshape((3,3))

In [4]:
# Show
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [5]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [6]:
# Let's see other axis options
# Top to bottom
np.concatenate([arr1,arr1],axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [7]:
# Now let's see how this works in pandas

In [8]:
# Lets create two Series with no overlap
# Series 1 = 3 point series
# Series 2 = 2 point series
ser1 =  Series([0,1,2],index=['T','U','V'])

ser2 = Series([3,4],index=['X','Y'])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [21]:
# Show ser1
ser1

T    0
U    1
V    2
dtype: int64

In [22]:
# Show ser2
ser2

X    3
Y    4
dtype: int64

In [20]:
#Now let use concat (default is axis=0)
pd.concat([ser1,ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [10]:
# Now passing along another axis will produce a DataFrame
pd.concat([ser1,ser2],axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [11]:
# We can specify which specific axes to be used
pd.concat([ser1,ser2],axis=1,join_axes=[['U','V','Y']])

Unnamed: 0,0,1
U,1.0,
V,2.0,
Y,,4.0


In [14]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([ser1,ser2],keys=['cat1','cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [15]:
# Along the axis=1 then these Keys become column headers
pd.concat([ser1,ser2],axis=1,keys=['cat1','cat2'])

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [16]:
#Lastly, everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [17]:
# Show dframe1
dframe1

Unnamed: 0,X,Y,Z
0,-1.056573,2.490553,-1.40819
1,-1.173953,-1.506289,1.802178
2,0.539707,0.073094,-0.51241
3,-0.63573,0.650252,0.058255


In [18]:
# Show dframe2
dframe2

Unnamed: 0,Y,Q,X
0,-0.584068,-0.580587,0.4648
1,-0.794761,0.415032,-0.482546
2,-0.232848,-0.165883,-0.414876


In [19]:
#Concat on DataFrame
pd.concat([dframe1,dframe2])

Unnamed: 0,Q,X,Y,Z
0,,-1.056573,2.490553,-1.40819
1,,-1.173953,-1.506289,1.802178
2,,0.539707,0.073094,-0.51241
3,,-0.63573,0.650252,0.058255
0,-0.580587,0.4648,-0.584068,
1,0.415032,-0.482546,-0.794761,
2,-0.165883,-0.414876,-0.232848,


In [23]:
#If we dont care about the index info and just awnt to make a complete DataFrame, just use ignore_index
pd.concat([dframe1,dframe2],ignore_index=True)

Unnamed: 0,Q,X,Y,Z
0,,-1.056573,2.490553,-1.40819
1,,-1.173953,-1.506289,1.802178
2,,0.539707,0.073094,-0.51241
3,,-0.63573,0.650252,0.058255
4,-0.580587,0.4648,-0.584068,
5,0.415032,-0.482546,-0.794761,
6,-0.165883,-0.414876,-0.232848,


In [24]:
#For more info in documentation:
url='http://pandas.pydata.org/pandas-docs/dev/generated/pandas.tools.merge.concat.html'