#### Function that creates a dataframe

In [80]:
import pandas as pd
import numpy as np

In [82]:
def create_df(cols,ind):
    data = {c: [str(c) + str(i) for i in ind]
            for c in cols}
    return pd.DataFrame(data,ind)
create_df('XYZ',range(4))

Unnamed: 0,X,Y,Z
0,X0,Y0,Z0
1,X1,Y1,Z1
2,X2,Y2,Z2
3,X3,Y3,Z3


### Concatination of numpy arrays:

In [85]:
x = [1,2,3]
y = [4,5,6]
z = [7,8,9]
df = np.concatenate([x,y,z])
df

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [87]:
a = [[1,2],
     [3,4]]
df = np.concatenate([a,a],axis=0)
df

array([[1, 2],
       [3, 4],
       [1, 2],
       [3, 4]])

## simple concatenation with `pd.concat`

### syntax
 pd.concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False  copy=True)

In [214]:
s1 = pd.Series(['Anu','Adith','Hari'])
s2 = pd.Series(['Alan','Mincy','Sonu'])
df = pd.concat([s1,s2],axis=1)
df

Unnamed: 0,0,1
0,Anu,Alan
1,Adith,Mincy
2,Hari,Sonu


In [93]:
df1 = create_df('AB', [1, 2])
df2 = create_df('AB', [3, 4])
display('df1', 'df2', 'pd.concat([df1, df2])')
df1


'df1'

'df2'

'pd.concat([df1, df2])'

Unnamed: 0,A,B
1,A1,B1
2,A2,B2


In [95]:
df2

Unnamed: 0,A,B
3,A3,B3
4,A4,B4


In [97]:
pd.concat([df1, df2])

Unnamed: 0,A,B
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4


In [100]:
df3 = create_df('AB', [0, 1])
df4 = create_df('CD', [0, 1])
display('df3', 'df4', "pd.concat([df3, df4], axis='col')")

'df3'

'df4'

"pd.concat([df3, df4], axis='col')"

In [102]:
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [104]:
df4

Unnamed: 0,C,D
0,C0,D0
1,C1,D1


In [106]:
pd.concat([df3, df4], axis=1)

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1


### Duplicate indices

There is a difference between `np.concatenate` and `pd.concat` ,in pd.concat the indices are preserved even if the results will have duplicate indices.

In [110]:
x = create_df('AB',[0,1])
y = create_df('AB',[2,3])
y.index = x.index # create duplicate index
x

Unnamed: 0,A,B
0,A0,B0
1,A1,B1


In [112]:
y

Unnamed: 0,A,B
0,A2,B2
1,A3,B3


In [114]:
pd.concat([x,y])

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
0,A2,B2
1,A3,B3


#### Catching the repeats as an error

In [117]:
try:
    pd.concat([x, y], verify_integrity=True)
except ValueError as e:
    print("ValueError:", e)
else:
    print(pd.concat([x,y]))

ValueError: Indexes have overlapping values: Index([0, 1], dtype='int64')


#### Ignoring the index

In [120]:
pd.concat([x,y],ignore_index=True)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


#### Adding MultiIndex keys

In [166]:
pd.concat([x,y],keys=['x','y'])

Unnamed: 0,Unnamed: 1,A,B
x,0,A0,B0
x,1,A1,B1
y,0,A2,B2
y,1,A3,B3


In [125]:
df5 = create_df('ABC', [1, 2])
df6 = create_df('BCD', [3, 4])
display('df5', 'df6', 'pd.concat([df5, df6])')
df5

'df5'

'df6'

'pd.concat([df5, df6])'

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2


In [127]:
df6

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4


In [129]:
pd.concat([df5, df6])

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [131]:
pd.concat([df5,df6],join='inner')

Unnamed: 0,B,C
1,B1,C1
2,B2,C2
3,B3,C3
4,B4,C4


In [133]:
pd.concat([df5,df6],join='outer')

Unnamed: 0,A,B,C,D
1,A1,B1,C1,
2,A2,B2,C2,
3,,B3,C3,D3
4,,B4,C4,D4


In [135]:
pd.concat([df5, df6], join_axes=[df5.columns])

TypeError: concat() got an unexpected keyword argument 'join_axes'

In [137]:
display('df5', 'df6',
        "pd.concat([df5, df6], join_axes=[df5.columns])")

'df5'

'df6'

'pd.concat([df5, df6], join_axes=[df5.columns])'

In [139]:
df5

Unnamed: 0,A,B,C
1,A1,B1,C1
2,A2,B2,C2


In [141]:
df6

Unnamed: 0,B,C,D
3,B3,C3,D3
4,B4,C4,D4


In [143]:
pd.concat([df5, df6], join_axes=[df5.columns])

TypeError: concat() got an unexpected keyword argument 'join_axes'

In [197]:
df1.append(df2)

AttributeError: 'DataFrame' object has no attribute 'append'

#### Concatenating a Dataframewith a series

In [207]:
import pandas as pd
df = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 
                   'B': ['B0', 'B1', 'B2', 'B3']})
series = pd.Series([1, 2, 3, 4])
pd.concat([df, series],axis = 1)

Unnamed: 0,A,B,0
0,A0,B0,1
1,A1,B1,2
2,A2,B2,3
3,A3,B3,4


In [205]:
df1

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


In [209]:
series

0    1
1    2
2    3
3    4
dtype: int64

#### Concatenate Using Pandas with Examples
( Geeks for Geeks)

* Example 1: Concatenate DataFrames in Python

In [168]:
import pandas as pd

# creating the Series
series1 = pd.Series([1, 2, 3])
display('series1:', series1)
series2 = pd.Series(['A', 'B', 'C'])
display('series2:', series2)

# concatenating
display('After concatenating:')
display(pd.concat([series1, series2]))


'series1:'

0    1
1    2
2    3
dtype: int64

'series2:'

0    A
1    B
2    C
dtype: object

'After concatenating:'

0    1
1    2
2    3
0    A
1    B
2    C
dtype: object

* Example 2: Pandas combining two dataframes horizontally with index = 1

In [173]:
# importing the module
import pandas as pd

# creating the Series
series1 = pd.Series([1, 2, 3])
display('series1:', series1)
series2 = pd.Series(['A', 'B', 'C'])
display('series2:', series2)


display('After concatenating:')
display(pd.concat([series1, series2], axis = 1))


'series1:'

0    1
1    2
2    3
dtype: int64

'series2:'

0    A
1    B
2    C
dtype: object

'After concatenating:'

Unnamed: 0,0,1
0,1,A
1,2,B
2,3,C


* Example 3: Concatenating 2 DataFrames and Assigning Keys

In [177]:
import pandas as pd

# creating the DataFrames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
					'B': ['B0', 'B1', 'B2', 'B3']})
display('df1:', df1)
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
					'B': ['B4', 'B5', 'B6', 'B7']})
display('df2:', df2)


display('After concatenating:')
display(pd.concat([df1, df2],keys=['key1', 'key2']))


'df1:'

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


'df2:'

Unnamed: 0,A,B
0,A4,B4
1,A5,B5
2,A6,B6
3,A7,B7


'After concatenating:'

Unnamed: 0,Unnamed: 1,A,B
key1,0,A0,B0
key1,1,A1,B1
key1,2,A2,B2
key1,3,A3,B3
key2,0,A4,B4
key2,1,A5,B5
key2,2,A6,B6
key2,3,A7,B7


* Example 4: Concatenating DataFrames horizontally in Pandas with axis = 1

In [193]:
import pandas as pd

# creating the DataFrames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 
					'B': ['B0', 'B1', 'B2', 'B3']})
display('df1:', df1)
df2 = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'], 
					'D': ['D0', 'D1', 'D2', 'D3']})
display('df2:', df2)


display('After concatenating:')
display(pd.concat([df1, df2],axis = 1))


'df1:'

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


'df2:'

Unnamed: 0,C,D
0,C0,D0
1,C1,D1
2,C2,D2
3,C3,D3


'After concatenating:'

Unnamed: 0,A,B,C,D
0,A0,B0,C0,D0
1,A1,B1,C1,D1
2,A2,B2,C2,D2
3,A3,B3,C3,D3


* Example 5: Concatenating 2 DataFrames with ignore_index = True

In [184]:
import pandas as pd

# creating the DataFrames
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 
					'B': ['B0', 'B1', 'B2', 'B3']})
display('df1:', df1)
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'], 
					'B': ['B4', 'B5', 'B6', 'B7']})
display('df2:', df2)


display('After concatenating:')
display(pd.concat([df1, df2], ignore_index = True))


'df1:'

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


'df2:'

Unnamed: 0,A,B
0,A4,B4
1,A5,B5
2,A6,B6
3,A7,B7


'After concatenating:'

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
4,A4,B4
5,A5,B5
6,A6,B6
7,A7,B7


* Example 6: Concatenating a DataFrame with a Series

In [186]:
import pandas as pd

# creating the DataFrame
df = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'], 
					'B': ['B0', 'B1', 'B2', 'B3']})
display('df:', df1)
# creating the Series
series = pd.Series([1, 2, 3, 4])
display('series:', series)

# concatenating
display('After concatenating:')
display(pd.concat([df, series],axis = 1))


'df:'

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3


'series:'

0    1
1    2
2    3
3    4
dtype: int64

'After concatenating:'

Unnamed: 0,A,B,0
0,A0,B0,1
1,A1,B1,2
2,A2,B2,3
3,A3,B3,4
