# Concat Dataframes

In [1]:
import pandas as pd
india_weather = pd.DataFrame({
    "city": ["mumbai", "delhi", "banglore"],
    "temperature": [32, 45, 30],
    "humidity": [80, 60, 78]
})
india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [2]:
us_weather = pd.DataFrame({
    "city": ["new york", "chicago", "orlando"],
    "temperature": [21, 14, 35],
    "humidity": [68, 65, 75],
})
us_weather

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65
2,orlando,35,75


In [5]:
#ignore_index=True to ignore the index
df = pd.concat([india_weather, us_weather], keys=["India", "Us"])
df

Unnamed: 0,Unnamed: 1,city,temperature,humidity
India,0,mumbai,32,80
India,1,delhi,45,60
India,2,banglore,30,78
Us,0,new york,21,68
Us,1,chicago,14,65
Us,2,orlando,35,75


In [6]:
df.loc["India"] #using the key

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


# more examples

In [8]:
df_temp = pd.DataFrame({
    "city": ["mumbai", "delhi", "banglore"],
    "temperature": [32, 45, 30],
})
df_temp

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [14]:
df_windspeed = pd.DataFrame({
    "city": ["delhi", "mumbai"],
    "windspeed": [7, 12]
}, index=[1, 0]) #index is used to  change the order
df_windspeed

Unnamed: 0,city,windspeed
1,delhi,7
0,mumbai,12


In [15]:
#concatenating temp and windspeed
df2 = pd.concat([df_temp, df_windspeed], axis=1) #axis = 1 means concatenate along columns
df2

Unnamed: 0,city,temperature,city.1,windspeed
0,mumbai,32,mumbai,12.0
1,delhi,45,delhi,7.0
2,banglore,30,,


In [17]:
# creating a serie
s = pd.Series(["Humid", "Dry", "Rain"], name="event")
s

0    Humid
1      Dry
2     Rain
Name: event, dtype: object

In [19]:
#adding event serie to my temperature dataset
df = pd.concat([df_temp, s], axis=1)
df

Unnamed: 0,city,temperature,event
0,mumbai,32,Humid
1,delhi,45,Dry
2,banglore,30,Rain


# Merge

In [28]:
df1 = pd.DataFrame({
    "city": ["new york", "chicago", "orlando", "baltimore"],
    "temperature": [21, 14, 35, 32],
})
df1

Unnamed: 0,city,temperature
0,new york,21
1,chicago,14
2,orlando,35
3,baltimore,32


In [30]:
df2 = pd.DataFrame({
    "city": ["chicago", "new york", "san francisco"],
    "humidity": [65, 68, 71],
})
df2

Unnamed: 0,city,humidity
0,chicago,65
1,new york,68
2,san francisco,71


In [33]:
df3 = pd.merge(df1, df2, on="city") #merging on cities("on" means intersection of cities)
df3

Unnamed: 0,city,temperature,humidity
0,new york,21,68
1,chicago,14,65


In [37]:
# outer is used to include values outside the intersection, other values: left, right, inner(default)
# indicator=True, shows info about where is the data. on the right or left set
df3 = pd.merge(df1, df2, on="city", how="outer") 
df3

Unnamed: 0,city,temperature,humidity
0,new york,21.0,68.0
1,chicago,14.0,65.0
2,orlando,35.0,
3,baltimore,32.0,
4,san francisco,,71.0


In [39]:
df5 = pd.DataFrame({
    "city": ["new york", "chicago", "orlando", "baltimore"],
    "temperature": [21, 14, 35, 38],
    "humidity": [65, 68, 71, 75]
})
df5

Unnamed: 0,city,temperature,humidity
0,new york,21,65
1,chicago,14,68
2,orlando,35,71
3,baltimore,38,75


In [40]:
df6 = pd.DataFrame({
    "city": ["chicago", "new york", "san diego"],
    "temperature": [30, 40, 50],
    "humidity": [45, 55, 65]
})
df6

Unnamed: 0,city,temperature,humidity
0,chicago,30,45
1,new york,40,55
2,san diego,50,65


In [42]:
df7 = pd.merge(df5, df6, on="city", suffixes=('_left', '_right'))
df7

Unnamed: 0,city,temperature_left,humidity_left,temperature_right,humidity_right
0,new york,21,65,40,55
1,chicago,14,68,30,45
