In [4]:
import numpy as np
import pandas as pd

# Concatenation of series
ser1 = pd.Series(['A', 'B', 'C'], index=[1, 2, 3])
ser2 = pd.Series(['D', 'E', 'F'], index=[4, 5, 6])

pd.concat([ser1, ser2])

1    A
2    B
3    C
4    D
5    E
6    F
dtype: object

In [39]:
# Concatenation of data frames
def make_df(cols, ind):
    """Quickly make a data frame"""
    data = {c: [str(c)+str(i) for i in ind]
           for c in cols}
    return pd.DataFrame(data, ind)

df1 = make_df('AB', [1, 2])
df2 = make_df('AB', [3, 4])
print(df1)
print(df2)
print("Row-wise concatenation:")
print(pd.concat([df1, df2])) # By default, row-wise concatenation takes place i.e. axis=0

df3 = make_df('AB', [1, 2])
df4 = make_df('AB', [1, 2])
print("Column-wise concatenation:")
print(pd.concat([df3, df4], axis=1)) # Notice the axis is 1 (column-wise)

# One important difference between np.concatenate and pd.concat is that Pandas
# concatenation preserves indices, even if the result will have duplicate indices!
# Notice the repeated indices in the result. While this is valid within DataFrames, the
# outcome is often undesirable. pd.concat() gives us a few ways to handle it.
df5 = make_df('AB', [0, 1])
df6 = make_df('AB', [2, 3])
print("Original data frames:")
print("df5:")
print(df5)
print("df6:")
print(df6)
print("Concatenate original data frames:")
print(pd.concat([df5, df6]))

# Make duplicate indices
df6.index = df5.index
print("Data frames after making duplicate indices:")
print("df5:")
print(df5)
print("df6:")
print(df6)
print("Concatenate data frames after duplicating indices:")
print(pd.concat([df5, df6]))

    A   B
1  A1  B1
2  A2  B2
    A   B
3  A3  B3
4  A4  B4
Row-wise concatenation:
    A   B
1  A1  B1
2  A2  B2
3  A3  B3
4  A4  B4
Column-wise concatenation:
    A   B   A   B
1  A1  B1  A1  B1
2  A2  B2  A2  B2
Original data frames:
df5:
    A   B
0  A0  B0
1  A1  B1
df6:
    A   B
2  A2  B2
3  A3  B3
Concatenate original data frames:
    A   B
0  A0  B0
1  A1  B1
2  A2  B2
3  A3  B3
Data frames after making duplicate indices:
df5:
    A   B
0  A0  B0
1  A1  B1
df6:
    A   B
0  A2  B2
1  A3  B3
Concatenate data frames after duplicating indices:
    A   B
0  A0  B0
1  A1  B1
0  A2  B2
1  A3  B3


In [40]:
# Catching the duplicate indices as error
try:
    pd.concat([df5, df6], verify_integrity = True)
except ValueError as e:
    print("Value Error due to duplicate indices: ", e)    

ValueError:  Indexes have overlapping values: Int64Index([0, 1], dtype='int64')


In [43]:
# Ignoring the indices
print(pd.concat([df5, df6], ignore_index = True))

    A   B
0  A0  B0
1  A1  B1
2  A2  B2
3  A3  B3


In [44]:
# Adding multiple Indices
print(pd.concat([df5, df6], keys=['x', 'y']))

      A   B
x 0  A0  B0
  1  A1  B1
y 0  A2  B2
  1  A3  B3


In [49]:
df7 = make_df('ABC', [1, 2])
df8 = make_df('BCD', [3, 4])
print(df7); print(df8); 
print("Outer Join(default):")
print(pd.concat([df7, df8]))

print(df5); print(df6);
print("Inner Join:")
print(pd.concat([df5, df6], join='inner'))

    A   B   C
1  A1  B1  C1
2  A2  B2  C2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
Outer Join(default):
     A   B   C    D
1   A1  B1  C1  NaN
2   A2  B2  C2  NaN
3  NaN  B3  C3   D3
4  NaN  B4  C4   D4
    A   B
0  A0  B0
1  A1  B1
    A   B
0  A2  B2
1  A3  B3
Inner Join:
    A   B
0  A0  B0
1  A1  B1
0  A2  B2
1  A3  B3


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """
