In [2]:
import pandas as pd

staff = pd.DataFrame([{'Name':'Meziane','Role':'Director of RH'},
                      {'Name':'Mohamed','Role':'Supervison'},
                      {'Name':'Lissa','Role':'Secretariat'}])
staff=staff.set_index('Name');

students = pd.DataFrame([{'Name':'Meziane','School':'Buisness'},
                      {'Name':'Alina','School':'Medecine'},
                      {'Name':'James','School':'Fashion'}])
students=students.set_index('Name')

print(staff)
print(students)

                   Role
Name                   
Meziane  Director of RH
Mohamed      Supervison
Lissa       Secretariat
           School
Name             
Meziane  Buisness
Alina    Medecine
James     Fashion


In [3]:
#We want to merge both students and staff , and most important thing that they are indexed with the same
# index which is Name

pd.merge(staff,students,how='outer',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alina,,Medecine
James,,Fashion
Lissa,Secretariat,
Meziane,Director of RH,Buisness
Mohamed,Supervison,


In [4]:
pd.merge(staff,students,how='inner',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Meziane,Director of RH,Buisness


In [5]:
#If we want to get all the list of staff whatever they are students or not , and also get their school 
#if they are student too this call merge-left or merge right

pd.merge(staff,students,how='left',left_index=True,right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Meziane,Director of RH,Buisness
Mohamed,Supervison,
Lissa,Secretariat,


In [13]:
#Now we cant to get all the list of the students whatever if they are stuff or not and get their roles
#if they are member in the stuffs

pd.merge(students,staff,how='left',left_index=True,right_index=True)

Unnamed: 0,Name_x,School,Name_y,Role
0,Meziane,Buisness,Meziane,Director of RH
1,Alina,Medecine,Mohamed,Supervison
2,James,Fashion,Lissa,Secretariat


In [7]:
# We can also do it another way. The merge method has a couple of other interesting parameters. First, you
# don't need to use indices to join on, you can use columns as well. Here's an example. Here we have a
# parameter called "on", and we can assign a column that both dataframe has as the joining column

# First, lets remove our index from both of our dataframes
staff = staff.reset_index()
students = students.reset_index()

# Now lets merge using the on parameter
pd.merge(staff, students, how='right', on='Name')

Unnamed: 0,Name,Role,School
0,Meziane,Director of RH,Buisness
1,Alina,,Medecine
2,James,,Fashion


In [8]:
# So what happens when we have conflicts between the DataFrames? Let's take a look by creating new staff and
# student DataFrames that have a location information added to them.
staff_df = pd.DataFrame([{'Name': 'Kelly', 'Role': 'Director of HR', 
                          'Location': 'State Street'},
                         {'Name': 'Sally', 'Role': 'Course liasion', 
                          'Location': 'Washington Avenue'},
                         {'Name': 'James', 'Role': 'Grader', 
                          'Location': 'Washington Avenue'}])
student_df = pd.DataFrame([{'Name': 'James', 'School': 'Business', 
                            'Location': '1024 Billiard Avenue'},
                           {'Name': 'Mike', 'School': 'Law', 
                            'Location': 'Fraternity House #22'},
                           {'Name': 'Sally', 'School': 'Engineering', 
                            'Location': '512 Wilson Crescent'}])

#the merge function keep the data so if we made how='right' of the right parameter
#with _y suffix and _x for the left parameter

#example

pd.merge(staff_df,student_df,how='left',on='Name')

Unnamed: 0,Name,Role,Location_x,School,Location_y
0,Kelly,Director of HR,State Street,,
1,Sally,Course liasion,Washington Avenue,Engineering,512 Wilson Crescent
2,James,Grader,Washington Avenue,Business,1024 Billiard Avenue


In [25]:
#if we want to use multiple index to make the join for a exemple in our case we use the first and the last
#name of both data , we pass a list of columns name on the on parameter

staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins', 
                          'Role': 'Director of HR'},
                         {'First Name': 'Sally', 'Last Name': 'Brooks', 
                          'Role': 'Course liasion'},
                         {'First Name': 'James', 'Last Name': 'Wilde', 
                          'Role': 'Grader'}])
student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond', 
                            'School': 'Business'},
                           {'First Name': 'Mike', 'Last Name': 'Smith', 
                            'School': 'Law'},
                           {'First Name': 'Sally', 'Last Name': 'Brooks', 
                            'School': 'Engineering'}])

pd.merge(staff_df,student_df,how='right',on=['First Name','Last Name'])

Unnamed: 0,First Name,Last Name,Role,School
0,Sally,Brooks,Course liasion,Engineering
1,James,Hammond,,Business
2,Mike,Smith,,Law


In [10]:
#Concatenation
staff_df = pd.DataFrame([{'First Name': 'Kelly', 'Last Name': 'Desjardins'},
                         {'First Name': 'Sally', 'Last Name': 'Brooks'
                         },
                         {'First Name': 'James', 'Last Name': 'Wilde'
                          }])
student_df = pd.DataFrame([{'First Name': 'James', 'Last Name': 'Hammond'
                            },
                           {'First Name': 'Mike', 'Last Name': 'Smith'
                           },
                           {'First Name': 'Sally', 'Last Name': 'Brooks'
                            }])
frames = [staff_df, student_df] #list of dataframes
pd.concat(frames)


Unnamed: 0,First Name,Last Name
0,Kelly,Desjardins
1,Sally,Brooks
2,James,Wilde
0,James,Hammond
1,Mike,Smith
2,Sally,Brooks


In [11]:
#As we can see we can't differenciate between the stuff and the students so actually we can use keys parameter
#to add multiple level index
pd.concat(frames,keys=['Staff','Students'])

Unnamed: 0,Unnamed: 1,First Name,Last Name
Staff,0,Kelly,Desjardins
Staff,1,Sally,Brooks
Staff,2,James,Wilde
Students,0,James,Hammond
Students,1,Mike,Smith
Students,2,Sally,Brooks


In [20]:

df_quiz = pd.DataFrame([{'P2010': ['James', 'School', 'Business']},
                        {'P2011': ['James', 'School', 'Business']},
                        {'P2012': ['James', 'School', 'Business']}])
                           
df_quiz['P2010']=[1,22,3]
df_quiz['P2011']=[11,3,3]
df_quiz['P2012']=[13,21,23]
df_quiz

Unnamed: 0,P2010,P2011,P2012
0,1,11,13
1,22,3,21
2,3,3,23


In [23]:
import numpy as np
frames = ['P2010', 'P2011', 'P2012']
df_quiz['AVG'] = df_quiz[frames].apply(lambda z: np.mean(z), axis=1)
result_df = df_quiz.drop(frames,axis=1)
result_df

Unnamed: 0,AVG
0,8.333333
1,15.333333
2,9.666667
