In [1]:
# Now we'll learn about concatenating along an axis
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# First in just Numpy

In [5]:
# Create a matrix 
arr1 = np.arange(9).reshape((3,3))

In [6]:
# Show
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [14]:
# Concatenate along axis 1
np.concatenate([arr1,arr1],axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [15]:
# Let's see other axis options
np.concatenate([arr1,arr1],axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [8]:
# Now let's see how this works in pandas

In [16]:
# Lets create two Series with no overlap
ser1 =  Series([0,1,2],index=['T','U','V'])

ser2 = Series([3,4],index=['X','Y'])

#Now let use concat (default is axis=0)


In [17]:
# Now passing along another axis will produce a DataFrame
pd.concat([ser1,ser2],axis=0)

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [18]:
# We can specify which specific axes to be used
pd.concat([ser1,ser2],axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [19]:
# Lets say we wanted to add markers.keys to the concatenation result

# WE can do this with a hierarchical index
pd.concat([ser1,ser2],axis=0,keys=["cat1","cat2"])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [20]:
# Along the axis=1 then these Keys become column headers
pd.concat([ser1,ser2],axis=1,keys=["cat1","cat2"])

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [21]:
#Lastly, everything works similarly in DataFrames

dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe2 = DataFrame(np.random.randn(3, 3), columns=['Y', 'Q', 'X'])

In [22]:
#Concat on DataFrame
pd.concat([dframe1,dframe2])

Unnamed: 0,X,Y,Z,Q
0,0.88735,1.22822,-0.745079,
1,0.307921,0.506596,0.345377,
2,-0.094188,-0.92889,1.474355,
3,-0.54752,1.636385,-0.297415,
0,0.468243,-0.004159,,-1.684121
1,1.073503,0.771871,,-1.241932
2,-1.267383,0.140434,,0.504545


In [23]:
#If we dont care about the index info and just awnt to make a complete DataFrame, just use ignore_index
pd.concat([dframe1,dframe2],ignore_index=True)

Unnamed: 0,X,Y,Z,Q
0,0.88735,1.22822,-0.745079,
1,0.307921,0.506596,0.345377,
2,-0.094188,-0.92889,1.474355,
3,-0.54752,1.636385,-0.297415,
4,0.468243,-0.004159,,-1.684121
5,1.073503,0.771871,,-1.241932
6,-1.267383,0.140434,,0.504545


In [18]:
#For more info in documentation:
url='http://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html'

In [None]:
#Next up: More on Combining DataFrames with Overlapping Indexes!