# Concat

In [None]:
# Concatenate is an operation that we do when we join two or more dataframes.

In [1]:
import pandas as pd

In [2]:
# lets suppose we have weather data stored in two different data frames

india_weather = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [32,45,30],
    'humidity' : [80,60,78] 
})

india_weather

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [3]:
us_weather = pd.DataFrame({
    'city' : ['new york','chicago','orlando'],
    'temperature' : [22,15,35],
    'humidity' : [60,50,70] 
})

us_weather

Unnamed: 0,city,temperature,humidity
0,new york,22,60
1,chicago,15,50
2,orlando,35,70


In [5]:
# concat() function takes a list of dataframes you want to join

df = pd.concat([india_weather,us_weather])
df



Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78
0,new york,22,60
1,chicago,15,50
2,orlando,35,70


In [6]:
# It is using the index from the original dataframe what if we want continuous index we can do this 
# by passing the argument ignore_index = True

df = pd.concat([india_weather,us_weather], ignore_index=True)
df

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78
3,new york,22,60
4,chicago,15,50
5,orlando,35,70


In [7]:
# Reteriving the weather data of indian cities from the dataframe 

# For this we will use keys
# We have to associate the key with each of the dataframe that we are passing in the list 

df = pd.concat([india_weather,us_weather], keys=['india','us'])
df

# It create a additional index for the subset of the dataframe
# These indexes are useful when we merge the dataframes and after that from the new dataframe we want to get 
# back the original dataframe

Unnamed: 0,Unnamed: 1,city,temperature,humidity
india,0,mumbai,32,80
india,1,delhi,45,60
india,2,banglore,30,78
us,0,new york,22,60
us,1,chicago,15,50
us,2,orlando,35,70


In [8]:
# For reteriving the data we use loc[]

df.loc['india']

Unnamed: 0,city,temperature,humidity
0,mumbai,32,80
1,delhi,45,60
2,banglore,30,78


In [9]:
df.loc['us']

Unnamed: 0,city,temperature,humidity
0,new york,22,60
1,chicago,15,50
2,orlando,35,70


In [13]:
# Above we appended two dataframes on the top of each other but sometime we have a case where we have to append
# the dataframe as a column instead of appending the dataframe as a rows

# For Example we have two dataframes of temperature and windspeed

temperature_df = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [32,45,30],
})

temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [14]:
windspeed_df = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [12,25,30],
})

windspeed_df

Unnamed: 0,city,temperature
0,mumbai,12
1,delhi,25
2,banglore,30


In [12]:
# If we append the data frame with simple concat it will append the dataframe one over each other

df = pd.concat([temperature_df, windspeed_df])
df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30
0,mumbai,12
1,delhi,25
2,banglore,30


In [15]:
# If we want the other dataframe append as a column we have to provide the asix = 1
# by default the axis is 0

df = pd.concat([temperature_df, windspeed_df], axis=1)
df

Unnamed: 0,city,temperature,city.1,temperature.1
0,mumbai,32,mumbai,12
1,delhi,45,delhi,25
2,banglore,30,banglore,30


In [16]:
# What if the dataframes are not perfect and the order of the cities is different

temperature_df = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [32,45,30],
})

temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [17]:
windspeed_df = pd.DataFrame({
    'city' : ['delhi','mumbai'],
    'temperature' : [12,25],
})

windspeed_df

Unnamed: 0,city,temperature
0,delhi,12
1,mumbai,25


In [18]:
df = pd.concat([temperature_df, windspeed_df], axis=1)
df

Unnamed: 0,city,temperature,city.1,temperature.1
0,mumbai,32,delhi,12.0
1,delhi,45,mumbai,25.0
2,banglore,30,,


In [19]:
# This appending is not correct one city column has mumbai and one city column has delhi this need to be perfect
# For this we have to pass the index argument

temperature_df = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [32,45,30],
} ,index=[0,1,2])

temperature_df

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [21]:
windspeed_df = pd.DataFrame({
    'city' : ['delhi','mumbai'],
    'temperature' : [12,25],
},index=[1,0])                      # 1 for delhi and 0 for mumbai

windspeed_df

Unnamed: 0,city,temperature
1,delhi,12
0,mumbai,25


In [22]:
df = pd.concat([temperature_df, windspeed_df], axis=1)
df

Unnamed: 0,city,temperature,city.1,temperature.1
0,mumbai,32,mumbai,25.0
1,delhi,45,delhi,12.0
2,banglore,30,,


# Series

In [23]:
# We can also join the dataframes with the series

# A Series is a one-dimensional array of data. It can hold data of any type: string, integer, float, dictionaries, 
# lists, booleans, and more.

temperature_df = pd.DataFrame({
    'city' : ['mumbai','delhi','banglore'],
    'temperature' : [32,45,30],
})

temperature_df  

Unnamed: 0,city,temperature
0,mumbai,32
1,delhi,45
2,banglore,30


In [24]:
s = pd.Series(['Humid','Dry','Rain'], name='event')
s

0    Humid
1      Dry
2     Rain
Name: event, dtype: object

In [25]:
df = pd.concat([temperature_df, s], axis=1)
df

Unnamed: 0,city,temperature,event
0,mumbai,32,Humid
1,delhi,45,Dry
2,banglore,30,Rain
