## when we want to join two or more data frames, then we apply Concat

In [1]:
import pandas as pd

In [9]:
# weather data stored in two data frames. 
india_weather =pd.DataFrame( {
    'city' : ['mumbai', 'delhi', 'bengaluru'],
    'temperature' : [32,45,30],
    'humidity' : [80,60,78]
})
india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,bengaluru,30,78


In [10]:
us_weather = pd.DataFrame({
    'city' : ['new yourk', 'chicago', 'orlando'],
    'temperature' : [21,14,35],
    'humidity' : [68,65,75]
})
us_weather

Unnamed: 0,city,temperature,humidity
0,new yourk,21,68
1,chicago,14,65
2,orlando,35,75


In [11]:
# Now we want to join two data frame so that we can get data for two of the data frame. 
df = pd.concat([india_weather,us_weather])
df
# but here index is used originally. 

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,bengaluru,30,78
0,new yourk,21,68
1,chicago,14,65
2,orlando,35,75


In [12]:
df = pd.concat([india_weather,us_weather], ignore_index=True)
df
# Now we get contineous sequences
# pandas library: https://pandas.pydata.org/pandas-docs/version/0.22/generated/pandas.concat.html

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,bengaluru,30,78
3,new yourk,21,68
4,chicago,14,65
5,orlando,35,75


### Use of keys


In [14]:
df = pd.concat([india_weather,us_weather], keys=['india','us'])
df

Unnamed: 0,Unnamed: 1,city,temperature,humidity
india,0,mumbai,32,80
india,1,delhi,45,60
india,2,bengaluru,30,78
us,0,new yourk,21,68
us,1,chicago,14,65
us,2,orlando,35,75


In [18]:
# we can access through loc to these keys
df.loc['us']

Unnamed: 0,city,temperature,humidity
0,new yourk,21,68
1,chicago,14,65
2,orlando,35,75


### concat data frame with columns

In [None]:
# here we want to concat 'wind_speed' and 'temperature' dataframes of 3 cities. 

In [21]:
temperature_df = pd.DataFrame({
    'city' : ['new yourk', 'chicago', 'orlando'],
    'temperature' : [21,14,35]
})
temperature_df

Unnamed: 0,city,temperature
0,new yourk,21
1,chicago,14
2,orlando,35


In [22]:
windspeed_df = pd.DataFrame({
    'city' : ['new yourk', 'chicago', 'orlando'],
    'wind_speed' : [7,12,9]
})
windspeed_df

Unnamed: 0,city,wind_speed
0,new yourk,7
1,chicago,12
2,orlando,9


In [24]:
# we want wind_speed to appear as column in our new data frame. 
df2 = pd.concat([temperature_df,windspeed_df])
df2
# but we didn't get the proper output

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  


Unnamed: 0,city,temperature,wind_speed
0,new yourk,21.0,
1,chicago,14.0,
2,orlando,35.0,
0,new yourk,,7.0
1,chicago,,12.0
2,orlando,,9.0


In [49]:
df3 = pd.concat([temperature_df,windspeed_df],axis=1, keys = ['City_one','City_two'])
df3
# here we get the correct output

Unnamed: 0_level_0,City_one,City_one,City_two,City_two
Unnamed: 0_level_1,city,temperature,city,wind_speed
0,mumbai,21,delhi,7.0
1,delhi,14,mumbai,12.0
2,bangalore,35,,


### Obs: if our data is not in proper format, i.e. delhi data is missing

In [32]:
temperature_df_2 = pd.DataFrame({
    'city' : ['mumbai', 'delhi', 'bangalore'],
    'temperature' : [21,14,35]
})
temperature_df_2

Unnamed: 0,city,temperature
0,mumbai,21
1,delhi,14
2,bangalore,35


In [33]:
windspeed_df_2 = pd.DataFrame({
    'city' : ['delhi', 'mumbai'],
    'wind_speed' : [7,12]
})
windspeed_df_2

Unnamed: 0,city,wind_speed
0,delhi,7
1,mumbai,12


In [40]:
df4 = pd.concat([temperature_df_2,windspeed_df_2],axis=1)
df4
# this is not the correct output we expected. 

Unnamed: 0,city,temperature,city.1,wind_speed
0,mumbai,21,mumbai,12.0
1,delhi,14,delhi,7.0
2,bangalore,35,,


In [34]:
# We have to pass index argument while creating the dataframe
temperature_df_3 = pd.DataFrame({
    'city' : ['mumbai', 'delhi', 'bangalore'],
    'temperature' : [21,14,35]
}, index = [0,1,2])
temperature_df_3

Unnamed: 0,city,temperature
0,mumbai,21
1,delhi,14
2,bangalore,35


In [37]:
windspeed_df_3 = pd.DataFrame({
    'city' : ['delhi', 'mumbai'],
    'wind_speed' : [7,12]
}, index = [1,0])
windspeed_df_3

Unnamed: 0,city,wind_speed
1,delhi,7
0,mumbai,12


In [39]:
df5 = pd.concat([temperature_df_3,windspeed_df_3],axis=1)
df5

Unnamed: 0,city,temperature,city.1,wind_speed
0,mumbai,21,mumbai,12.0
1,delhi,14,delhi,7.0
2,bangalore,35,,


### Join data Frame with series

In [41]:
temperature_df

Unnamed: 0,city,temperature
0,mumbai,21
1,delhi,14
2,bangalore,35


In [44]:
s = pd.Series(['Rain','Sunny', 'Rain'], name = 'event')
s

0     Rain
1    Sunny
2     Rain
Name: event, dtype: object

In [46]:
df6 = pd.concat([temperature_df,s], axis=1)
df6

Unnamed: 0,city,temperature,event
0,mumbai,21,Rain
1,delhi,14,Sunny
2,bangalore,35,Rain
