# Applying Advanced Transformations
* James Belk
* 03/30/2023

## Imports

In [1]:
## Standard Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings(action='ignore')
## Importing the OS and JSON Modules
import os,json

## Data

In [2]:
powers = pd.read_csv('Data/superhero_info - superhero_info.csv')
info = pd.read_csv('Data/superhero_powers - superhero_powers.csv')

In [3]:
powers.head(2)

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"


In [4]:
info.head(2)

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."


## Clean Superhero Powers

### Split Hero|Publisher

In [5]:
powers[['Hero', 'Publisher']] = powers['Hero|Publisher'].str.split('|', expand = True)
powers.head(2)

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


In [6]:
powers = powers.drop(columns=['Hero|Publisher'])
powers.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics


### Split Measurements

In [7]:
measure = powers.loc[0,'Measurements']
print(type(measure))
measure

<class 'str'>


"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"

In [8]:
measure = measure.replace("'", '"')
measure

'{"Height": "203.0 cm", "Weight": "441.0 kg"}'

In [9]:
fixed_measure = json.loads(measure)
print(type(fixed_measure))
fixed_measure

<class 'dict'>


{'Height': '203.0 cm', 'Weight': '441.0 kg'}

In [10]:
powers['Measurements'] = powers['Measurements'].str.replace("'", '"')

powers['Measurements'] = powers['Measurements'].apply(json.loads)
powers['Measurements'].head(2)

0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
Name: Measurements, dtype: object

In [11]:
hw = powers['Measurements'].apply(pd.Series)
hw.head(2)

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg


In [12]:
hw[['Height', ' cm']] = hw['Height'].str.split(' cm', expand = True)
hw[['Weight', ' kg']] = hw['Weight'].str.split(' kg', expand = True)
hw.head(2)

Unnamed: 0,Height,Weight,cm,kg
0,203.0,441.0,,
1,191.0,65.0,,


In [13]:
hw = hw.drop(columns=[' cm', ' kg'])

In [14]:
df = pd.concat((powers, hw), axis = 1)
df.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0,65.0


In [15]:
df = df.drop(columns = ['Measurements'])

In [16]:
df.head(2)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0


## Clean Superhero Info 

In [17]:
power = info.loc[5,'Powers']
print(type(power))
power

<class 'str'>


'Dimensional Awareness,Flight,Intelligence,Super Strength,Size Changing,Super Speed,Teleportation,Magic,Dimensional Travel,Immortality,Invulnerability,Molecular Manipulation,Energy Manipulation,Power Cosmic'

In [18]:
info['Powers'] = info['Powers'].str.split(',')
info.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"[Agility, Super Strength, Stamina, Super Speed]"
1,A-Bomb,"[Accelerated Healing, Durability, Longevity, S..."
2,Abe Sapien,"[Agility, Accelerated Healing, Cold Resistance..."
3,Abin Sur,[Lantern Power Ring]
4,Abomination,"[Accelerated Healing, Intelligence, Super Stre..."


In [19]:
info['Powers'].value_counts()

[Intelligence]                                                                                                                                                                                                                                                                          8
[Durability, Super Strength]                                                                                                                                                                                                                                                            5
[Agility, Stealth, Marksmanship, Weapons Master, Stamina]                                                                                                                                                                                                                               4
[Marksmanship]                                                                                                                                            

In [20]:
exploded = info.explode('Powers')
exploded[['Powers']].head()

Unnamed: 0,Powers
0,Agility
0,Super Strength
0,Stamina
0,Super Speed
1,Accelerated Healing


In [21]:
col_to_make = exploded['Powers'].dropna().unique()
col_to_make

array(['Agility', 'Super Strength', 'Stamina', 'Super Speed',
       'Accelerated Healing', 'Durability', 'Longevity', 'Camouflage',
       'Self-Sustenance', 'Cold Resistance', 'Underwater breathing',
       'Marksmanship', 'Weapons Master', 'Intelligence', 'Telepathy',
       'Immortality', 'Reflexes', 'Enhanced Sight', 'Sub-Mariner',
       'Lantern Power Ring', 'Invulnerability', 'Animation',
       'Super Breath', 'Dimensional Awareness', 'Flight', 'Size Changing',
       'Teleportation', 'Magic', 'Dimensional Travel',
       'Molecular Manipulation', 'Energy Manipulation', 'Power Cosmic',
       'Energy Absorption', 'Elemental Transmogrification',
       'Fire Resistance', 'Natural Armor', 'Heat Resistance',
       'Matter Absorption', 'Regeneration', 'Stealth', 'Power Suit',
       'Energy Blasts', 'Energy Beams', 'Heat Generation', 'Danger Sense',
       'Phasing', 'Force Fields', 'Hypnokinesis', 'Invisibility',
       'Enhanced Senses', 'Jump', 'Shapeshifting', 'Elasticity',
 

In [22]:
for col in col_to_make:
    info[col] = info['Powers'].str.contains(col, regex = False)
info.head()

Unnamed: 0,hero_names,Powers,Agility,Super Strength,Stamina,Super Speed,Accelerated Healing,Durability,Longevity,Camouflage,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,3-D Man,"[Agility, Super Strength, Stamina, Super Speed]",True,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,"[Accelerated Healing, Durability, Longevity, S...",False,True,True,False,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,"[Agility, Accelerated Healing, Cold Resistance...",True,True,True,False,True,True,True,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,[Lantern Power Ring],False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,"[Accelerated Healing, Intelligence, Super Stre...",False,True,True,True,True,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [23]:
df2 = info.drop(columns = ['Powers'])
df2.head()

Unnamed: 0,hero_names,Agility,Super Strength,Stamina,Super Speed,Accelerated Healing,Durability,Longevity,Camouflage,Self-Sustenance,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,3-D Man,True,True,True,True,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,A-Bomb,False,True,True,False,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
2,Abe Sapien,True,True,True,False,True,True,True,False,False,...,False,False,False,False,False,False,False,False,False,False
3,Abin Sur,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,Abomination,False,True,True,True,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


## Merge Datasets

In [24]:
merged = pd.merge(df, df2, left_on='Hero', right_on ='hero_names')
merged.head(3)

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight,...,Weather Control,Omnipresent,Omniscient,Hair Manipulation,Nova Force,Odin Force,Phoenix Force,Intuitive aptitude,Melting,Changing Armor
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0,...,False,False,False,False,False,False,False,False,False,False
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0,...,False,False,False,False,False,False,False,False,False,False
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0,90.0,...,False,False,False,False,False,False,False,False,False,False


In [25]:
merged = merged.drop(columns=['hero_names'])

# Analysis

## Compare the average weight of super powers who have Super Speed to those who do not.

In [38]:
merged['Super Speed'] = merged['Super Speed'].replace(to_replace =(True, False), value=['Speed', 'No Speed'])

In [42]:
merged['Weight'] = pd.to_numeric(merged['Weight'])

In [43]:
merged.groupby('Super Speed')['Weight'].mean()

Super Speed
No Speed    101.773585
Speed       129.404040
Name: Weight, dtype: float64

## What is the average height of heroes for each publisher?

In [45]:
merged['Height'] = pd.to_numeric(merged['Height'])

In [46]:
merged.groupby('Publisher')['Height'].mean()

Publisher
DC Comics            181.923913
Dark Horse Comics    176.909091
George Lucas         159.600000
Image Comics         211.000000
Marvel Comics        191.546128
Shueisha             171.500000
Star Trek            181.500000
Team Epic TV         180.750000
Unknown              178.000000
Name: Height, dtype: float64