# How concatenation in DataFrames work?


In [3]:
# Imports

In [5]:
import pandas as pd
import numpy as np

In [8]:
# Create test dataframe 1

india_weather = pd.DataFrame({
    'city':['mumbai','delhi','banglore'],
    'temprature':[32,45,30],
    'humdity':[80,60,78]
})

india_weather

Unnamed: 0,city,temprature,humdity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [10]:
# Create test dataframe 2

us_weather = pd.DataFrame({
    'city':['new york','chicago','orlando'],
    'temprature':[21,14,35],
    'humdity':[68,65,75]
})

us_weather

Unnamed: 0,city,temprature,humdity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [19]:
df=pd.concat([india_weather,us_weather])

df

# Check
# df.index
# Produces redundant indexes

Int64Index([0, 1, 2, 0, 1, 2], dtype='int64')

In [20]:
# Ignore indexes
df=pd.concat([india_weather,us_weather],ignore_index=True)

df

#df.index
# Produces redundant indexes

Unnamed: 0,city,temprature,humdity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78
3,new york,21,68
4,chicago,14,65
5,orlando,35,75


In [54]:
# Concatenation and associate keys
df=pd.concat([india_weather,us_weather],keys=['india','usa'])

print(df)

print(df.loc['india'])
print(df.loc['usa'])

# Print row
print(f'First row in the india dataframe \n{0}',df.loc['india'][0:1])

# Print column
df.loc['usa'].city

             city  temprature  humdity
india 0    mumbai          32       80
      1     delhi          45       60
      2  banglore          30       78
usa   0  new york          21       68
      1   chicago          14       65
      2   orlando          35       75
       city  temprature  humdity
0    mumbai          32       80
1     delhi          45       60
2  banglore          30       78
       city  temprature  humdity
0  new york          21       68
1   chicago          14       65
2   orlando          35       75
First row in the india dataframe 
0      city  temprature  humdity
0  mumbai          32       80


0    new york
1     chicago
2     orlando
Name: city, dtype: object

In [63]:
# Concat dataframe side by side
temp_df = pd.DataFrame({
    'city':['mumbai','delhi','banglore'],
    'temprature':[32,45,30],
})

windspeed_df = pd.DataFrame({
    'city':['mumbai','delhi','banglore'],
    'windspeed':[7,12,9],
})

df = pd.concat([temp_df,windspeed_df],axis=1)
df

Unnamed: 0,city,temprature,city.1,windspeed
0,mumbai,32,mumbai,7
1,delhi,45,delhi,12
2,banglore,30,banglore,9


In [66]:
# Gotcha!!
# Let's change the order of the city in one of the dataframes and see what happens
# Concat dataframe side by side
temp_df = pd.DataFrame({
    'city':['mumbai','delhi','banglore'],
    'temprature':[32,45,30],
})

windspeed_df = pd.DataFrame({
    'city':['delhi','mumbai','banglore'],
    'windspeed':[12,7,9],
})

df = pd.concat([temp_df,windspeed_df],axis=1)
df

# See the both the city columns , observe the order changed




Unnamed: 0,city,temprature,city.1,windspeed
0,mumbai,32,delhi,12
1,delhi,45,mumbai,7
2,banglore,30,banglore,9


In [73]:
# Let's fix the above issue
# we should pass the indexes while we are creating the dataframes
temp_df = pd.DataFrame({
    'city':['mumbai','delhi','banglore'],
    'temprature':[32,45,30],
},index=[0,1,2])

windspeed_df = pd.DataFrame({
    'city':['delhi','mumbai','banglore'],
    'windspeed':[12,7,9],
},index=[1,0,2]) # check we are passing 1,0,2 instead of 0,1,2


df = pd.concat([temp_df,windspeed_df],axis=1)
df

Unnamed: 0,city,temprature,city.1,windspeed
0,mumbai,32,mumbai,7
1,delhi,45,delhi,12
2,banglore,30,banglore,9


In [74]:
# Join Dataframe with Series
event_series = pd.Series(['Humid','Dry','Rain'],name='event')
df = pd.concat([df,event_series],axis=1)
df

Unnamed: 0,city,temprature,city.1,windspeed,event
0,mumbai,32,mumbai,7,Humid
1,delhi,45,delhi,12,Dry
2,banglore,30,banglore,9,Rain
