# **Concat** in Pandas

Concat is a function that is used to concatenate two or more Pandas objects along a particular axis. The primary objects that concat operates on are DataFrames and Series.

In [None]:
import pandas as pd

# Create two sample DataFrames
temp_data = pd.DataFrame({'Date': ['12-02-2023', '13-02-2023', '14-02-2023', '15-02-2023', '16-02-2023'],
                    'TempMax': [24.3, 26.9, 23.4, 15.5, 16.1 ] })

rainfall_data = pd.DataFrame({'Date': ['12-02-2023', '13-02-2023', '14-02-2023', '15-02-2023', '16-02-2023'],
                    'Rainfall': [0, 3.6, 3.6, 39.8, 2.8 ] })


In [None]:

# Concatenate along rows (axis=0)
result_col = pd.concat([temp_data, rainfall_data])
print("\nConcatenate along rows:")
print(result_col)



Concatenate along rows:
         Date  TempMax  Rainfall
0  12-02-2023     24.3       NaN
1  13-02-2023     26.9       NaN
2  14-02-2023     23.4       NaN
3  15-02-2023     15.5       NaN
4  16-02-2023     16.1       NaN
0  12-02-2023      NaN       0.0
1  13-02-2023      NaN       3.6
2  14-02-2023      NaN       3.6
3  15-02-2023      NaN      39.8
4  16-02-2023      NaN       2.8


In case of concatenating along columns, are new columns added based on the same values in a common column ? Let us see.

In [None]:

temp_data = pd.DataFrame({'Date': ['12-02-2023', '13-02-2023', '14-02-2023', '15-02-2023', '16-02-2023'],
                    'TempMax': [24.3, 26.9, 23.4, 15.5, 16.1 ] })

rainfall_data = pd.DataFrame({'Date': ['14-02-2023', '15-02-2023', '16-02-2023', '17-02-2023', '18-02-2023'],
                    'Rainfall': [0, 3.6, 3.6, 39.8, 2.8 ] })

# Concatenate along columns (axis=1)
result_col = pd.concat([temp_data, rainfall_data], axis=1)
print("\nConcatenate along columns:")
print(result_col)



Concatenate along columns:
         Date  TempMax        Date  Rainfall
0  12-02-2023     24.3  14-02-2023       0.0
1  13-02-2023     26.9  15-02-2023       3.6
2  14-02-2023     23.4  16-02-2023       3.6
3  15-02-2023     15.5  17-02-2023      39.8
4  16-02-2023     16.1  18-02-2023       2.8


### ignore_index

In pandas.concat, the ignore_index parameter is a boolean value that determines whether to ignore the index labels along the concatenation axis or not.

When ignore_index is set to True, the resulting concatenated DataFrame will have a new RangeIndex along the concatenation axis, effectively ignoring the original index labels of the input DataFrames.

In [None]:
# Concatenate along columns (axis=0) (ignore_index=False)
result_col_notIgnore = pd.concat([temp_data, rainfall_data], axis=0,ignore_index=False)
print("\nConcatenate along columns with ignore_index = False:")
print(result_col_notIgnore)

# Concatenate along columns (axis=0) (ignore_index=True)
result_col_ignore = pd.concat([temp_data, rainfall_data], axis=0, ignore_index=True)
print("\nConcatenate along columns with ignore_index = True:")
print(result_col_ignore)
# notice the index of result_col dataframe


Concatenate along columns with ignore_index = False:
         Date  TempMax  Rainfall
0  12-02-2023     24.3       NaN
1  13-02-2023     26.9       NaN
2  14-02-2023     23.4       NaN
3  15-02-2023     15.5       NaN
4  16-02-2023     16.1       NaN
0  14-02-2023      NaN       0.0
1  15-02-2023      NaN       3.6
2  16-02-2023      NaN       3.6
3  17-02-2023      NaN      39.8
4  18-02-2023      NaN       2.8

Concatenate along columns with ignore_index = True:
         Date  TempMax  Rainfall
0  12-02-2023     24.3       NaN
1  13-02-2023     26.9       NaN
2  14-02-2023     23.4       NaN
3  15-02-2023     15.5       NaN
4  16-02-2023     16.1       NaN
5  14-02-2023      NaN       0.0
6  15-02-2023      NaN       3.6
7  16-02-2023      NaN       3.6
8  17-02-2023      NaN      39.8
9  18-02-2023      NaN       2.8


### join

In [None]:

# Concatenate with inner join
result_col_inner = pd.concat([temp_data, rainfall_data], axis=0, join='inner')
print("\nConcatenate with inner join:")
print(result_col_inner)

# Concatenate with outer join
result_col_outer = pd.concat([temp_data, rainfall_data], axis=0, join='outer')
print("\nConcatenate with outer join:")
print(result_col_outer)



Concatenate with inner join:
         Date
0  12-02-2023
1  13-02-2023
2  14-02-2023
3  15-02-2023
4  16-02-2023
0  12-02-2023
1  13-02-2023
2  14-02-2023
3  15-02-2023
4  16-02-2023

Concatenate with outer join:
         Date  TempMax  Rainfall
0  12-02-2023     24.3       NaN
1  13-02-2023     26.9       NaN
2  14-02-2023     23.4       NaN
3  15-02-2023     15.5       NaN
4  16-02-2023     16.1       NaN
0  12-02-2023      NaN       0.0
1  13-02-2023      NaN       3.6
2  14-02-2023      NaN       3.6
3  15-02-2023      NaN      39.8
4  16-02-2023      NaN       2.8


In [None]:
# Creating the first DataFrame
data1 = {'Name': ['Alice', 'Bob', 'Charlie'],
         'Age': [25, 30, 35],
         'Score': [85, 90, 88]}
df1 = pd.DataFrame(data1)

# Creating the second DataFrame
data2 = {'Name': ['David', 'Eve', 'Charlie'],
         'Age': [27, 32, 35],
         'Score': [82, 88, 88],
         "extra":[100,100,100]}
df2 = pd.DataFrame(data2)

print("DataFrame 1:")
print(df1)
print("\nDataFrame 2:")
print(df2)


DataFrame 1:
      Name  Age  Score
0    Alice   25     85
1      Bob   30     90
2  Charlie   35     88

DataFrame 2:
      Name  Age  Score  extra
0    David   27     82    100
1      Eve   32     88    100
2  Charlie   35     88    100


In [None]:
result_col_inner2 = pd.concat([df1, df2], axis=0, join='inner',ignore_index=True)
result_col_inner2

Unnamed: 0,Name,Age,Score
0,Alice,25,85
1,Bob,30,90
2,Charlie,35,88
3,David,27,82
4,Eve,32,88
5,Charlie,35,88


In [None]:
result_col_inner2_1 = pd.concat([df1, df2], axis=1,ignore_index=True)
result_col_inner2_1

Unnamed: 0,0,1,2,3,4,5,6
0,Alice,25,85,David,27,82,100
1,Bob,30,90,Eve,32,88,100
2,Charlie,35,88,Charlie,35,88,100


### keys

In [None]:

result_col_key1 = pd.concat([temp_data, rainfall_data], axis=0, keys=('Delhi','Chennai'))
print("\nConcatenate along columns with keys, axis = 0:")
print(result_col_key1)





Concatenate along columns with keys, axis = 0:
                 Date  TempMax  Rainfall
Delhi   0  12-02-2023     24.3       NaN
        1  13-02-2023     26.9       NaN
        2  14-02-2023     23.4       NaN
        3  15-02-2023     15.5       NaN
        4  16-02-2023     16.1       NaN
Chennai 0  12-02-2023      NaN       0.0
        1  13-02-2023      NaN       3.6
        2  14-02-2023      NaN       3.6
        3  15-02-2023      NaN      39.8
        4  16-02-2023      NaN       2.8

Concatenate along columns with keys, axis = 1:
            A                   B         
         Date TempMax        Date Rainfall
0  12-02-2023    24.3  12-02-2023      0.0
1  13-02-2023    26.9  13-02-2023      3.6
2  14-02-2023    23.4  14-02-2023      3.6
3  15-02-2023    15.5  15-02-2023     39.8
4  16-02-2023    16.1  16-02-2023      2.8


In [None]:
result_col_key1.loc["Delhi"]

Unnamed: 0,Date,TempMax,Rainfall
0,12-02-2023,24.3,
1,13-02-2023,26.9,
2,14-02-2023,23.4,
3,15-02-2023,15.5,
4,16-02-2023,16.1,


In [None]:
result_col_key1.loc["Delhi"]['TempMax'][4]

16.1

In [None]:
result_col_key2 = pd.concat([temp_data, rainfall_data], axis=1, keys='AB')
print("\nConcatenate along columns with keys, axis = 1:")
result_col_key2


Concatenate along columns with keys, axis = 1:


Unnamed: 0_level_0,A,A,B,B
Unnamed: 0_level_1,Date,TempMax,Date,Rainfall
0,12-02-2023,24.3,12-02-2023,0.0
1,13-02-2023,26.9,13-02-2023,3.6
2,14-02-2023,23.4,14-02-2023,3.6
3,15-02-2023,15.5,15-02-2023,39.8
4,16-02-2023,16.1,16-02-2023,2.8


In [None]:
max_temp_A = result_col_key2['A']['TempMax'].max()
print("Maximum temperature in section A:", max_temp_A)

Maximum temperature in section A: 26.9


In [None]:
result_col_key3 = pd.concat([df1, df2], axis=1, keys='AB')
print("\nConcatenate along columns with keys, axis = 1:")
result_col_key3


Concatenate along columns with keys, axis = 1:


Unnamed: 0_level_0,A,A,A,B,B,B,B
Unnamed: 0_level_1,Name,Age,Score,Name,Age,Score,extra
0,Alice,25,85,David,27,82,100
1,Bob,30,90,Eve,32,88,100
2,Charlie,35,88,Charlie,35,88,100


In [None]:
min_score_A3 = result_col_key3['A']['Score'].min()
print("Maximum temperature in section A:", min_score_A3)

Maximum temperature in section A: 85


In [None]:
min_score_B3 = result_col_key3['B']['Score'].min()
print("Maximum temperature in section B:", min_score_B3)

Maximum temperature in section B: 82
