In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

## 1. Combining DataFrames

In [2]:
cust_data = pd.DataFrame({"customerID":['101','102','103','104'],
                        'category': ['Medium','Medium','High','Low'],
                        'first_visit': ['yes','no','yes','yes'],
                        'sales': [123,52,214,663]},index=[0,1,2,3])

cust_data_new = pd.DataFrame({"customerID":['101','103','104','105'],
                    'distance': [12,9,44,21],
                    'sales': [123,214,663,331]},index=[4,5,6,7])

In [3]:
cust_data

Unnamed: 0,customerID,category,first_visit,sales
0,101,Medium,yes,123
1,102,Medium,no,52
2,103,High,yes,214
3,104,Low,yes,663


In [4]:
cust_data_new

Unnamed: 0,customerID,distance,sales
4,101,12,123
5,103,9,214
6,104,44,663
7,105,21,331


In [5]:
pd.concat([cust_data,cust_data_new],axis=0) # concatenate by row i.e axis=0

Unnamed: 0,customerID,category,first_visit,sales,distance
0,101,Medium,yes,123,
1,102,Medium,no,52,
2,103,High,yes,214,
3,104,Low,yes,663,
4,101,,,123,12.0
5,103,,,214,9.0
6,104,,,663,44.0
7,105,,,331,21.0


In [6]:
pd.concat([cust_data,cust_data_new],axis=1) # concatenate by column i.e axis=1

Unnamed: 0,customerID,category,first_visit,sales,customerID.1,distance,sales.1
0,101.0,Medium,yes,123.0,,,
1,102.0,Medium,no,52.0,,,
2,103.0,High,yes,214.0,,,
3,104.0,Low,yes,663.0,,,
4,,,,,101.0,12.0,123.0
5,,,,,103.0,9.0,214.0
6,,,,,104.0,44.0,663.0
7,,,,,105.0,21.0,331.0


In [7]:
# outer merge is union of on
pd.merge(cust_data,cust_data_new,how='outer',on='customerID')

Unnamed: 0,customerID,category,first_visit,sales_x,distance,sales_y
0,101,Medium,yes,123.0,12.0,123.0
1,102,Medium,no,52.0,,
2,103,High,yes,214.0,9.0,214.0
3,104,Low,yes,663.0,44.0,663.0
4,105,,,,21.0,331.0


In [8]:
# inner merge is intersection of on
pd.merge(cust_data,cust_data_new,how='inner',on='customerID') 

Unnamed: 0,customerID,category,first_visit,sales_x,distance,sales_y
0,101,Medium,yes,123,12,123
1,103,High,yes,214,9,214
2,104,Low,yes,663,44,663


In [9]:
data_quar = pd.DataFrame({'Q1': [101,102,103],
                              'Q2': [201,202,203]},
                               index=['I0','I1','I2'])

data_quar_new = pd.DataFrame({'Q3': [301,302,303],
                                  'Q4': [401,402,403]},
                               index=['I0','I2','I3'])

In [10]:
data_quar

Unnamed: 0,Q1,Q2
I0,101,201
I1,102,202
I2,103,203


In [11]:
data_quar_new

Unnamed: 0,Q3,Q4
I0,301,401
I2,302,402
I3,303,403


In [12]:
data_quar.join(data_quar_new,how='right') 
# outer, inner, left, and right work the same as merge

Unnamed: 0,Q1,Q2,Q3,Q4
I0,101.0,201.0,301,401
I2,103.0,203.0,302,402
I3,,,303,403


## 2. Saving and Loading DataFrames