# Combining Datasets

In [1]:
import pandas as pd

In [2]:
left = pd.DataFrame({'col1': [1,2,3,4,5], 'col2': ['a','b','c','d','e']})
right = pd.DataFrame({'col1': [3,4,5,6,7], 'col2': ['f','g','h','i','j']})

print(left)
print(right)

   col1 col2
0     1    a
1     2    b
2     3    c
3     4    d
4     5    e
   col1 col2
0     3    f
1     4    g
2     5    h
3     6    i
4     7    j


## Merge Function

In [3]:
pd.merge(left,right,on='col1')

Unnamed: 0,col1,col2_x,col2_y
0,3,c,f
1,4,d,g
2,5,e,h


In [4]:
pd.merge(left,right,on='col1',how='left')

Unnamed: 0,col1,col2_x,col2_y
0,1,a,
1,2,b,
2,3,c,f
3,4,d,g
4,5,e,h


In [5]:
pd.merge(left,right,on='col1',how='right')

Unnamed: 0,col1,col2_x,col2_y
0,3,c,f
1,4,d,g
2,5,e,h
3,6,,i
4,7,,j


In [6]:
pd.merge(left,right,on='col1',how='outer')

Unnamed: 0,col1,col2_x,col2_y
0,1,a,
1,2,b,
2,3,c,f
3,4,d,g
4,5,e,h
5,6,,i
6,7,,j


In [7]:
df1 = pd.DataFrame({'account_id': [1,2,3,11382],
                    'gender': ['female','male','female','male'],
                    'age': [55,25,29,39]})
df1

Unnamed: 0,account_id,gender,age
0,1,female,55
1,2,male,25
2,3,female,29
3,11382,male,39


In [8]:
df2 = pd.DataFrame({'account_id': [4,5,6,7],
                    'gender': ['female','male','female','male'],
                    'age': [19,28,14,15]})
df2

Unnamed: 0,account_id,gender,age
0,4,female,19
1,5,male,28
2,6,female,14
3,7,male,15


## Concatenating

In [9]:
pd.concat([df1,df2])

Unnamed: 0,account_id,gender,age
0,1,female,55
1,2,male,25
2,3,female,29
3,11382,male,39
0,4,female,19
1,5,male,28
2,6,female,14
3,7,male,15


In [10]:
pd.concat([df1,df2],axis=1)

Unnamed: 0,account_id,gender,age,account_id.1,gender.1,age.1
0,1,female,55,4,female,19
1,2,male,25,5,male,28
2,3,female,29,6,female,14
3,11382,male,39,7,male,15


In [12]:
pd.concat([df1,df2],ignore_index=True)

Unnamed: 0,account_id,gender,age
0,1,female,55
1,2,male,25
2,3,female,29
3,11382,male,39
4,4,female,19
5,5,male,28
6,6,female,14
7,7,male,15


In [13]:
hero_power = pd.read_pickle('Hero Power.pkl')
marvel_hero = pd.read_pickle('Marvel Hero.pkl')
dc_hero = pd.read_pickle('DC Hero.pkl')

In [14]:
hero_power.head(1)

Unnamed: 0,hero_names,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [15]:
marvel_hero.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,A-Bomb,Male,yellow,Human,No Hair,203,Marvel Comics,good,441


In [16]:
dc_hero.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,Abin Sur,Male,blue,Ungaran,No Hair,185,DC Comics,good,90.0


In [19]:
marvel_hero.shape

(388, 9)

In [20]:
dc_hero.shape

(215, 9)

In [21]:
388+215

603

In [22]:
hero_info = pd.concat([marvel_hero,dc_hero],ignore_index=True)

In [24]:
hero_info.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 603 entries, 0 to 602
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   name        603 non-null    string  
 1   Gender      603 non-null    category
 2   Eye color   603 non-null    object  
 3   Race        603 non-null    string  
 4   Hair color  603 non-null    object  
 5   Height      603 non-null    int64   
 6   Publisher   603 non-null    string  
 7   Alignment   603 non-null    category
 8   Weight      532 non-null    float64 
dtypes: category(2), float64(1), int64(1), object(2), string(3)
memory usage: 34.6+ KB


In [25]:
hero_info.head(1)

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight
0,A-Bomb,Male,yellow,Human,No Hair,203,Marvel Comics,good,441.0


In [26]:
hero_power.head(1)

Unnamed: 0,hero_names,Agility,Accelerated Healing,Lantern Power Ring,Dimensional Awareness,Cold Resistance,Durability,Stealth,Energy Absorption,Flight,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,3-D Man,True,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [27]:
heroes = pd.merge(hero_info,hero_power,left_on='name',right_on='hero_names')

In [28]:
heroes

Unnamed: 0,name,Gender,Eye color,Race,Hair color,Height,Publisher,Alignment,Weight,hero_names,...,Web Creation,Reality Warping,Odin Force,Symbiote Costume,Speed Force,Phoenix Force,Molecular Dissipation,Vision - Cryo,Omnipresent,Omniscient
0,A-Bomb,Male,yellow,Human,No Hair,203,Marvel Comics,good,441.0,A-Bomb,...,False,False,False,False,False,False,False,False,False,False
1,Abomination,Male,green,Human / Radiation,No Hair,203,Marvel Comics,bad,441.0,Abomination,...,False,False,False,False,False,False,False,False,False,False
2,Abraxas,Male,blue,Cosmic Entity,Black,-99,Marvel Comics,bad,-99.0,Abraxas,...,False,False,False,False,False,False,False,False,False,False
3,Absorbing Man,Male,blue,Human,No Hair,193,Marvel Comics,bad,122.0,Absorbing Man,...,False,False,False,False,False,False,False,False,False,False
4,Agent Bob,Male,brown,Human,Brown,178,Marvel Comics,good,81.0,Agent Bob,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
532,Wildfire,Male,-,-,-,-99,DC Comics,good,,Wildfire,...,False,False,False,False,False,False,False,False,False,False
533,Wonder Girl,Female,blue,Demi-God,Blond,165,DC Comics,good,51.0,Wonder Girl,...,False,False,False,False,False,False,False,False,False,False
534,Wonder Woman,Female,blue,Amazon,Black,183,DC Comics,good,74.0,Wonder Woman,...,False,False,False,False,False,False,False,False,False,False
535,Zatanna,Female,blue,Human,Black,170,DC Comics,good,57.0,Zatanna,...,False,False,False,False,False,False,False,False,False,False


In [29]:
heroes.shape

(537, 177)