In [1]:
import pandas as pd

nepal_weather = pd.DataFrame({
    "city": ["kathmandu","salyan","pokhara"],
    "temperature": [32,45,30],
    "humidity": [80, 60, 78]
})
nepal_weather

Unnamed: 0,city,temperature,humidity
0,kathmandu,32,80
1,salyan,45,60
2,pokhara,30,78


In [2]:
spain_weather = pd.DataFrame({
    "city": ["Madrid","Barcelona","Sevilla"],
    "temperature": [40,30,32],
    "humidity": [70, 60, 73]
})
spain_weather

Unnamed: 0,city,temperature,humidity
0,Madrid,40,70
1,Barcelona,30,60
2,Sevilla,32,73


In [4]:
df = pd.concat([nepal_weather, spain_weather])
df

Unnamed: 0,city,temperature,humidity
0,kathmandu,32,80
1,salyan,45,60
2,pokhara,30,78
0,Madrid,40,70
1,Barcelona,30,60
2,Sevilla,32,73


Here, the indexing is similar like that in the original dataframes. If you want to make the dataframe having continuous indexes then we can add one field to them like below


In [5]:
df = pd.concat([nepal_weather, spain_weather], ignore_index=True)

In [6]:
df

Unnamed: 0,city,temperature,humidity
0,kathmandu,32,80
1,salyan,45,60
2,pokhara,30,78
3,Madrid,40,70
4,Barcelona,30,60
5,Sevilla,32,73


See, it worked like magic....

We can also use keys after concatenating the dataframes. It would help to provide a good information.

In [7]:
df = pd.concat([nepal_weather, spain_weather], keys=['nepal','spain'])

In [8]:
df

Unnamed: 0,Unnamed: 1,city,temperature,humidity
nepal,0,kathmandu,32,80
nepal,1,salyan,45,60
nepal,2,pokhara,30,78
spain,0,Madrid,40,70
spain,1,Barcelona,30,60
spain,2,Sevilla,32,73


In [9]:
df.loc['nepal']

Unnamed: 0,city,temperature,humidity
0,kathmandu,32,80
1,salyan,45,60
2,pokhara,30,78


In [10]:
df.loc['spain']

Unnamed: 0,city,temperature,humidity
0,Madrid,40,70
1,Barcelona,30,60
2,Sevilla,32,73


axis=1, means we are adding the new dataframe to display in the columns rather than adding into the rows by default. This is a very 
good process in concatenating two dataframes. 

In [11]:
temperature_df = pd.DataFrame({
    "city": ["ktm","pkr","dhrn"],
    "temperature": [32,45,30],
}, index=[0,1,2])
temperature_df

Unnamed: 0,city,temperature
0,ktm,32
1,pkr,45
2,dhrn,30


In [13]:
windspeed_df = pd.DataFrame({
    "city": ["pkr","ktm"],
    "windspeed": [7,12],
}, index=[1,0])
windspeed_df

Unnamed: 0,city,windspeed
1,pkr,7
0,ktm,12


In [14]:


df = pd.concat([temperature_df,windspeed_df])
df



Unnamed: 0,city,temperature,windspeed
0,ktm,32.0,
1,pkr,45.0,
2,dhrn,30.0,
1,pkr,,7.0
0,ktm,,12.0


Here, we can see the windspeed dataframe gets concatenated in rows, which is not a suitable process.
So, what we do here is that we make the axis=1 so that the second dataframe gets concatenated into the 
columns of the first dataframe giving a good visual representation about the data.

In [21]:
df = pd.concat([temperature_df,windspeed_df], axis=1)


In [22]:
df

Unnamed: 0,city,temperature,city.1,windspeed
0,ktm,32,ktm,12.0
1,pkr,45,pkr,7.0
2,dhrn,30,,


Concatenating dataframes with series

In [28]:
s = pd.Series(["Humid","Rain","Dry"], name="event")
s

0    Humid
1     Rain
2      Dry
Name: event, dtype: object

In [29]:
df = pd.concat([temperature_df,s],axis=1)
df

Unnamed: 0,city,temperature,event
0,ktm,32,Humid
1,pkr,45,Rain
2,dhrn,30,Dry


Merging Dataframes on a column so that the column doesn't get repeated. 
It is a better method than that of the concatenating dataframes

In [31]:
import pandas as pd
df1 = pd.DataFrame({
    "movie": ["spiderman","amazing spiderman","spiderman home"],
    "actress": ["kirsten dunst","emma stone","zendaya"],
})
df1

Unnamed: 0,movie,actress
0,spiderman,kirsten dunst
1,amazing spiderman,emma stone
2,spiderman home,zendaya


In [33]:
df2 = pd.DataFrame({
    "movie": ["spiderman","amazing spiderman","spiderman home"],
    "actor": ["tobey maguire","andrew garfield","tom holland"],
})
df2

Unnamed: 0,movie,actor
0,spiderman,tobey maguire
1,amazing spiderman,andrew garfield
2,spiderman home,tom holland


In [34]:
df=pd.merge(df1,df2,on="movie")

In [38]:
df.set_index("movie",inplace=True)

In [39]:
df

Unnamed: 0_level_0,actress,actor
movie,Unnamed: 1_level_1,Unnamed: 2_level_1
spiderman,kirsten dunst,tobey maguire
amazing spiderman,emma stone,andrew garfield
spiderman home,zendaya,tom holland


Merging is basically the inner join of two dataframes. Only the common ones get placed.
But what about the data that are not common. Let's see




In [42]:
import pandas as pd
df1 = pd.DataFrame({
    "movie": ["spiderman","amazing spiderman","spiderman home","batman"],
    "actress": ["kirsten dunst","emma stone","zendaya","maggie"],
})
df1


Unnamed: 0,movie,actress
0,spiderman,kirsten dunst
1,amazing spiderman,emma stone
2,spiderman home,zendaya
3,batman,maggie


In [43]:
df2 = pd.DataFrame({
    "movie": ["spiderman","amazing spiderman","spiderman home","aquaman"],
    "actor": ["tobey maguire","andrew garfield","tom holland","jason momoa"]
})
df2

Unnamed: 0,movie,actor
0,spiderman,tobey maguire
1,amazing spiderman,andrew garfield
2,spiderman home,tom holland
3,aquaman,jason momoa


In [44]:
df=pd.merge(df1,df2,on="movie")

In [45]:
df

Unnamed: 0,movie,actress,actor
0,spiderman,kirsten dunst,tobey maguire
1,amazing spiderman,emma stone,andrew garfield
2,spiderman home,zendaya,tom holland


Here, we can't see the movie batman and aquaman with the actor and the actress name. 
It is because they are uncommon, but the merge only works with the common data.

Now, it's time to use the join strategy like in the database. The inner join means the intersection Meanwhile, the left and the right 
join only consists of elements in the 1st dataframe and the second dataframe respectively.

In [47]:
df=pd.merge(df1,df2,on="movie",how="outer")

In [48]:
df

Unnamed: 0,movie,actress,actor
0,spiderman,kirsten dunst,tobey maguire
1,amazing spiderman,emma stone,andrew garfield
2,spiderman home,zendaya,tom holland
3,batman,maggie,
4,aquaman,,jason momoa


In [49]:
dfl=pd.merge(df1,df2,on="movie",how="left")

In [50]:
dfl

Unnamed: 0,movie,actress,actor
0,spiderman,kirsten dunst,tobey maguire
1,amazing spiderman,emma stone,andrew garfield
2,spiderman home,zendaya,tom holland
3,batman,maggie,


In [51]:
dfr=pd.merge(df1,df2,on="movie",how="right")

In [52]:
dfr

Unnamed: 0,movie,actress,actor
0,spiderman,kirsten dunst,tobey maguire
1,amazing spiderman,emma stone,andrew garfield
2,spiderman home,zendaya,tom holland
3,aquaman,,jason momoa


The use of indicator in the merged data frame is that it gives from where the data actually came into the merged part.

In [53]:
dfri=pd.merge(df1,df2,on="movie",how="outer",indicator=True)

In [54]:
dfri

Unnamed: 0,movie,actress,actor,_merge
0,spiderman,kirsten dunst,tobey maguire,both
1,amazing spiderman,emma stone,andrew garfield,both
2,spiderman home,zendaya,tom holland,both
3,batman,maggie,,left_only
4,aquaman,,jason momoa,right_only


In the above dataframe we can see the batman came from the left part. Meanwhile, aquaman came from the right part. 
Thus, that's how pd.merge works with indicator and how 

Use of suffixes

In [55]:


df1 = pd.DataFrame({
    "city": ["new york","chicago","orlando", "baltimore"],
    "temperature": [21,14,35,38],
    "humidity": [65,68,71, 75]
})
df1



Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [56]:
df2 = pd.DataFrame({
    "city": ["chicago","new york","san diego"],
    "temperature": [21,14,35],
    "humidity": [65,68,71]
})
df2

Unnamed: 0,city,temperature,humidity
0,chicago,21,65
1,new york,14,68
2,san diego,35,71


In [57]:


df3= pd.merge(df1,df2,on="city",how="outer", suffixes=('_first','_second'))
df3



Unnamed: 0,city,temperature_first,humidity_first,temperature_second,humidity_second
0,new york,21.0,65.0,14.0,68.0
1,chicago,14.0,68.0,21.0,65.0
2,orlando,35.0,71.0,,
3,baltimore,38.0,75.0,,
4,san diego,,,35.0,71.0
