## Concatenation and merging of DataFrames

In [1]:
import pandas as pd

### Create a Dataframe from Dictionary

In [3]:
# India's weather details
weather_data = {
    "city" : ["kolkata","bhuj","rajkot","surat"],
    "temperature" : [38,43,33,36],
    "humidity" : [89,56,76,67]    
}
india = pd.DataFrame(weather_data)
india

Unnamed: 0,city,temperature,humidity
0,kolkata,38,89
1,bhuj,43,56
2,rajkot,33,76
3,surat,36,67


In [4]:
# US weather details 
weather_data = {
    "city" : ["nyc","los angeles","las vegas","torronto"],
    "temperature" : [28,13,23,29],
    "humidity" : [59,36,66,67]    
} 
us = pd.DataFrame(weather_data)
us

Unnamed: 0,city,temperature,humidity
0,nyc,28,59
1,los angeles,13,36
2,las vegas,23,66
3,torronto,29,67


### Concatenate two DataFrames 

In [5]:
# Indices are preserved
df = pd.concat([india,us])
df

Unnamed: 0,city,temperature,humidity
0,kolkata,38,89
1,bhuj,43,56
2,rajkot,33,76
3,surat,36,67
0,nyc,28,59
1,los angeles,13,36
2,las vegas,23,66
3,torronto,29,67


In [6]:
# Ignore indexing on concatenation 
df = pd.concat([india,us],ignore_index=True)
df

Unnamed: 0,city,temperature,humidity
0,kolkata,38,89
1,bhuj,43,56
2,rajkot,33,76
3,surat,36,67
4,nyc,28,59
5,los angeles,13,36
6,las vegas,23,66
7,torronto,29,67


In [7]:
# Default concatenation takes place by rows (axis=0)
# We can override it to concatenate by columns
df = pd.concat([india,us],axis=1)
df

Unnamed: 0,city,temperature,humidity,city.1,temperature.1,humidity.1
0,kolkata,38,89,nyc,28,59
1,bhuj,43,56,los angeles,13,36
2,rajkot,33,76,las vegas,23,66
3,surat,36,67,torronto,29,67


### Merge operation

In [8]:
# Create two dataframes describing humidity and temperature records of cities

# Temperature DataFrame
data_temp = {
    "city" : ["bhuj","surat","madhapar","chennai"],
    "temp" : [45,34,47,26]
}
temp  = pd.DataFrame(data_temp)
temp

Unnamed: 0,city,temp
0,bhuj,45
1,surat,34
2,madhapar,47
3,chennai,26


In [11]:
# Temperature DataFrame
data_humidity = {
    "city" : ["bhuj","surat","chennai"],
    "humidity" : [35,64,76]
}
humidity  = pd.DataFrame(data_humidity)
humidity

Unnamed: 0,city,humidity
0,bhuj,35
1,surat,64
2,chennai,76


In [14]:
# Merge the records based on city and with no index specified
# Madhapar city will be missing from this dataframe because it has no humidity data
# Similar to inner join
df = pd.merge(temp,humidity,on='city')
df

Unnamed: 0,city,temp,humidity
0,bhuj,45,35
1,surat,34,64
2,chennai,26,76


In [19]:
# Perform merge (outer join) so records with missing values are also visible
# Missing value will appear as NaN (Not a number)
df = pd.merge(temp,humidity,on='city',how='outer')
df

Unnamed: 0,city,temp,humidity
0,bhuj,45,35.0
1,surat,34,64.0
2,madhapar,47,
3,chennai,26,76.0
