In [1]:
import pandas as pd 
import numpy as np

## 1. merging datasets on keys 

In [2]:
df1 = pd.DataFrame({'name' :['walid','bora','hamza'] , 
                    'points': [10,20,30]})
df1

Unnamed: 0,name,points
0,walid,10
1,bora,20
2,hamza,30


In [35]:
df2 = pd.DataFrame({'name' :['walid','bora','gri3a'] ,
                    'age' :[21,21,22]})
df2

Unnamed: 0,name,age
0,walid,21
1,bora,21
2,gri3a,22


In [5]:
# merging the data 
pd.merge(df1,df2)

Unnamed: 0,name,points,age
0,walid,10,21
1,bora,20,21


In [6]:
# we specify witch column to use as commun column 
pd.merge(df1,df2, on ='name')

Unnamed: 0,name,points,age
0,walid,10,21
1,bora,20,21


In [7]:
# in the previous example we used intersection merging 

In [8]:
pd.merge(df1,df2,how ='outer')

Unnamed: 0,name,points,age
0,walid,10.0,21.0
1,bora,20.0,21.0
2,hamza,30.0,
3,gri3a,,22.0


In [9]:
pd.merge(df1,df2,how = 'left')

Unnamed: 0,name,points,age
0,walid,10,21.0
1,bora,20,21.0
2,hamza,30,


In [12]:
df3 = pd.DataFrame({'student_name' :['walid','bora','gri3a'] ,
                    'age' :[21,21,22]})
df3

Unnamed: 0,student_name,age
0,walid,21
1,bora,21
2,gri3a,22


In [13]:
# because the keys have different names we use : 
pd.merge(df1,df3,left_on = "name" ,right_on ="student_name")


Unnamed: 0,name,points,student_name,age
0,walid,10,walid,21
1,bora,20,bora,21


## 2. Merging Datasets on Index

In [15]:
df4 = pd.DataFrame(np.random.randint(100,size =(4,3)) ,
                   index = ['c','d','a','e'],
                   columns = ['green','yellow','red'] )
df4

Unnamed: 0,green,yellow,red
c,30,37,32
d,53,47,24
a,8,88,4
e,75,41,76


In [17]:
df5 = pd.DataFrame(np.random.randint(100,size =(3,4)) ,
                   index = ['c','d','a'],
                   columns = ['blue','black','pink','purple'] )
df5

Unnamed: 0,blue,black,pink,purple
c,5,60,2,8
d,47,85,97,26
a,42,5,31,10


In [19]:
pd.merge(df4,df5,left_index = True,right_index = True)
# we can also add how = 'outer'

Unnamed: 0,green,yellow,red,blue,black,pink,purple
c,30,37,32,5,60,2,8
d,53,47,24,47,85,97,26
a,8,88,4,42,5,31,10


## 3. Concatenating Along an Axis (concat)

In [20]:
ps1 = pd.Series([1,2,3,4] ,index =['a','b','c','d'])
ps1

a    1
b    2
c    3
d    4
dtype: int64

In [22]:
ps2 = pd.Series([5,6] ,index= ['e','f'])
ps2

e    5
f    6
dtype: int64

In [24]:
pd.concat([ps1,ps2])

a    1
b    2
c    3
d    4
e    5
f    6
dtype: int64

In [25]:
pd.concat([ps1,ps2] ,axis = 1)

Unnamed: 0,0,1
a,1.0,
b,2.0,
c,3.0,
d,4.0,
e,,5.0
f,,6.0


In [26]:
ps3 = pd.Series([1,2] ,index =['a','b'])
ps3

a    1
b    2
dtype: int64

In [30]:
pd.concat([ps1,ps3],axis =1)

Unnamed: 0,0,1
a,1,1.0
b,2,2.0
c,3,
d,4,


In [31]:
pd.concat([ps1,ps3],axis =1 , join= 'inner')

Unnamed: 0,0,1
a,1,1
b,2,2


In [37]:
df1

Unnamed: 0,name,points
0,walid,10
1,bora,20
2,hamza,30


In [39]:
df7 = pd.DataFrame({'name' :['gri3a','ma3naoui'] , 
                    'points': [50,40]})
df7

Unnamed: 0,name,points
0,gri3a,50
1,ma3naoui,40


In [41]:
pd.concat([df1,df7])

Unnamed: 0,name,points
0,walid,10
1,bora,20
2,hamza,30
0,gri3a,50
1,ma3naoui,40


In [42]:
# to solve the problem of indexes we use 
pd.concat([df1,df7] , ignore_index= True)

Unnamed: 0,name,points
0,walid,10
1,bora,20
2,hamza,30
3,gri3a,50
4,ma3naoui,40
