# Pandas DataFrames: Adding Columns and Merging DataFrames

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame([{'Name':'Chris','Item Purchased':'Sponge','Cost':22.50},
                  {'Name':'Kevyn','Item Purchased':'Kitty Litter','Cost':2.50},
                  {'Name':'Filip','Item Purchased':'Spoon','Cost':5.00}],
                 index = ['Store 1','Store 2','Store 3'])
df

Unnamed: 0,Cost,Item Purchased,Name
Store 1,22.5,Sponge,Chris
Store 2,2.5,Kitty Litter,Kevyn
Store 3,5.0,Spoon,Filip


## Add a Column

In [3]:
df['Date'] = ['January 1','December 31','May 7']
df

Unnamed: 0,Cost,Item Purchased,Name,Date
Store 1,22.5,Sponge,Chris,January 1
Store 2,2.5,Kitty Litter,Kevyn,December 31
Store 3,5.0,Spoon,Filip,May 7


In [4]:
# If we want the same value for all the rows we can supply a single value
df['Delivered'] = True
df

Unnamed: 0,Cost,Item Purchased,Name,Date,Delivered
Store 1,22.5,Sponge,Chris,January 1,True
Store 2,2.5,Kitty Litter,Kevyn,December 31,True
Store 3,5.0,Spoon,Filip,May 7,True


In [5]:
# What if we want to add 2 rows of data?
df['Feedback'] = ['Positive', None, 'Negative']
df

Unnamed: 0,Cost,Item Purchased,Name,Date,Delivered,Feedback
Store 1,22.5,Sponge,Chris,January 1,True,Positive
Store 2,2.5,Kitty Litter,Kevyn,December 31,True,
Store 3,5.0,Spoon,Filip,May 7,True,Negative


In [6]:
# Lets remove the store index
df.reset_index(inplace=True)
df

Unnamed: 0,index,Cost,Item Purchased,Name,Date,Delivered,Feedback
0,Store 1,22.5,Sponge,Chris,January 1,True,Positive
1,Store 2,2.5,Kitty Litter,Kevyn,December 31,True,
2,Store 3,5.0,Spoon,Filip,May 7,True,Negative


In [7]:
# What if we just want to add row data an DO NOT want to manually input None or NaN for every row we have no data for?
# We use a Series
df['Date'] = pd.Series({0:'June 5',2:'April 9'})
df

Unnamed: 0,index,Cost,Item Purchased,Name,Date,Delivered,Feedback
0,Store 1,22.5,Sponge,Chris,June 5,True,Positive
1,Store 2,2.5,Kitty Litter,Kevyn,,True,
2,Store 3,5.0,Spoon,Filip,April 9,True,Negative


## Merge DatafFrames

In [8]:
staff_df = pd.DataFrame([{'Name':'Kelly','Role':'Director of HR'},
                        {'Name':'Sally','Role':'Course Liason'},
                        {'Name':'James','Role':'Grader'}])
staff_df.set_index('Name', inplace=True)
staff_df

Unnamed: 0_level_0,Role
Name,Unnamed: 1_level_1
Kelly,Director of HR
Sally,Course Liason
James,Grader


In [9]:
student_df = pd.DataFrame([{'Name': 'Sally', 'School':'Business'},
                           {'Name':'Meanna', 'School':'Law'},
                           {'Name':'Joanna','School':'Arts'}])
student_df.set_index('Name', inplace = True)
student_df

Unnamed: 0_level_0,School
Name,Unnamed: 1_level_1
Sally,Business
Meanna,Law
Joanna,Arts


In [10]:
pd.merge(staff_df, student_df, how='outer', left_index=True, right_index=True)

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
James,Grader,
Joanna,,Arts
Kelly,Director of HR,
Meanna,,Law
Sally,Course Liason,Business


In [11]:
pd.merge(staff_df, student_df, how='inner', left_index=True, right_index=True) # Hpw can be left or right

Unnamed: 0_level_0,Role,School
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Sally,Course Liason,Business


In [12]:
# Joining on column instead of index
student_df.reset_index(inplace = True)
staff_df.reset_index(inplace=True)

staff_df


Unnamed: 0,Name,School
0,Sally,Business
1,Meanna,Law
2,Joanna,Arts


In [13]:
# Joining on column instead of index
student_df

Unnamed: 0,Name,School
0,Sally,Business
1,Meanna,Law
2,Joanna,Arts


In [14]:
# Joining on column instead of index
pd.merge(student_df,staff_df, how='outer', left_on='Name', right_on='Name')

Unnamed: 0,Name,School,Role
0,Sally,Business,Course Liason
1,Meanna,Law,
2,Joanna,Arts,
3,Kelly,,Director of HR
4,James,,Grader


In [15]:
# Join on multiple columns
# pd.merge(student_df,staff_df, how='outer', left_on=['First Name','Last Name'], right_on=['First Name','Last Name'])