In [1]:
import pandas as pd
import numpy as np

pd.set_option("mode.copy_on_write", True)

# `pd.concat`

- Use `axis=1` for horizontal concatenation

# Merging On Index

## Series

In [2]:
# Create two simple Series with some overlapping indices
series1 = pd.Series([10, 20, 30], index=['a', 'b', 'c'])
series2 = pd.Series([40, 50, 60], index=['b', 'c', 'd'])

In [3]:
# Basic concatenation along axis=1 (columns)
# When an index exists in one Series but not the other, it fills with NaN. This is an "outer" join, which is the default.
merged = pd.concat([series1, series2], axis=1)
merged

Unnamed: 0,0,1
a,10.0,
b,20.0,40.0
c,30.0,50.0
d,,60.0


In [4]:
# Outer join (default) - keeps all indices
outer_merge = pd.concat([series1, series2], axis=1, join='outer')
outer_merge

Unnamed: 0,0,1
a,10.0,
b,20.0,40.0
c,30.0,50.0
d,,60.0


In [5]:
# Inner join - keeps only common indices
inner_merge = pd.concat([series1, series2], axis=1, join='inner')
inner_merge

Unnamed: 0,0,1
b,20,40
c,30,50


In [6]:
# You might want to track which Series each value came from. You can use the keys parameter for this:
merged = pd.concat([series1, series2], axis=1, keys=['Store A', 'Store B'])
merged

Unnamed: 0,Store A,Store B
a,10.0,
b,20.0,40.0
c,30.0,50.0
d,,60.0


# DF and Series

In [9]:

# Create a DataFrame and a Series
df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6]
}, index=['x', 'y', 'z'])

# Concatenating with named Series
series_named = pd.Series([7, 8, 9], index=['x', 'y', 'z'], name='C')
result1 = pd.concat([df, series_named], axis=1)
print("Concatenating named Series as a new column:")
print(result1)
print("\n")

# Concatenating with unnamed Series
# Without a name, the Series column will be labeled with a numeric index (0)
series_unnamed = pd.Series([7, 8, 9], index=['x', 'y', 'z'])
result2 = pd.concat([df, series_unnamed], axis=1)
print("Concatenating unnamed Series as a new column:")
print(result2)
print("\n")


Concatenating named Series as a new column:
   A  B  C
x  1  4  7
y  2  5  8
z  3  6  9


Concatenating unnamed Series as a new column:
   A  B  0
x  1  4  7
y  2  5  8
z  3  6  9


