# Applying Advanced Transformations (Core)

## Imports

In [148]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [149]:
import os, time,json
import tmdbsimple as tmdb 

In [150]:
superhero_info = pd.read_csv('Data\superhero_info - superhero_info.csv')
superhero_info.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


In [151]:
superhero_powers = pd.read_csv('Data\superhero_powers - superhero_powers.csv')
superhero_powers.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [152]:
# Use str.get_dummies to separate each power and create new columns
powers_dummies = superhero_powers['Powers'].str.get_dummies(sep=',')

# Concatenate the original superhero_powers dataframe with the new powers_dummies dataframe
final_superhero_powers = pd.concat([superhero_powers, powers_dummies], axis=1)

# Drop the original 'Powers' column as it's no longer needed
final_superhero_powers = final_superhero_powers.drop(columns=['Powers'])

# Display the head of the resulting dataframe
print(final_superhero_powers.head())

    hero_names  Accelerated Healing  Adaptation  Agility  Animal Attributes  \
0      3-D Man                    0           0        1                  0   
1       A-Bomb                    1           0        0                  0   
2   Abe Sapien                    1           0        1                  0   
3     Abin Sur                    0           0        0                  0   
4  Abomination                    1           0        0                  0   

   Animal Control  Animal Oriented Powers  Animation  Anti-Gravity  \
0               0                       0          0             0   
1               0                       0          0             0   
2               0                       0          0             0   
3               0                       0          0             0   
4               0                       0          1             0   

   Astral Projection  ...  Vision - Thermal  Vision - X-Ray  Vitakinesis  \
0                  0  ...   

In [153]:
# Use str.get_dummies to create one-hot-encoded columns
powers_encoded = superhero_powers['Powers'].str.get_dummies(sep=',')

# Drop the original 'Powers' column and concatenate with the encoded columns
final_superhero_powers = pd.concat([superhero_powers.drop('Powers', axis=1), powers_encoded], axis=1)

print(final_superhero_powers.head())

    hero_names  Accelerated Healing  Adaptation  Agility  Animal Attributes  \
0      3-D Man                    0           0        1                  0   
1       A-Bomb                    1           0        0                  0   
2   Abe Sapien                    1           0        1                  0   
3     Abin Sur                    0           0        0                  0   
4  Abomination                    1           0        0                  0   

   Animal Control  Animal Oriented Powers  Animation  Anti-Gravity  \
0               0                       0          0             0   
1               0                       0          0             0   
2               0                       0          0             0   
3               0                       0          0             0   
4               0                       0          1             0   

   Astral Projection  ...  Vision - Thermal  Vision - X-Ray  Vitakinesis  \
0                  0  ...   

In [154]:
superhero_info['Hero|Publisher'].apply(pd.Series)

Unnamed: 0,0
0,A-Bomb|Marvel Comics
1,Abe Sapien|Dark Horse Comics
2,Abin Sur|DC Comics
3,Abomination|Marvel Comics
4,Absorbing Man|Marvel Comics
...,...
458,Yellowjacket|Marvel Comics
459,Yellowjacket II|Marvel Comics
460,Yoda|George Lucas
461,Zatanna|DC Comics


In [155]:
# Split Hero and Publisher into two seperate columns
superhero_info['Hero|Publisher'].str.split('|',expand=True)

Unnamed: 0,0,1
0,A-Bomb,Marvel Comics
1,Abe Sapien,Dark Horse Comics
2,Abin Sur,DC Comics
3,Abomination,Marvel Comics
4,Absorbing Man,Marvel Comics
...,...,...
458,Yellowjacket,Marvel Comics
459,Yellowjacket II,Marvel Comics
460,Yoda,George Lucas
461,Zatanna,DC Comics


In [156]:
superhero_info[['Hero', 'Publisher']] = superhero_info['Hero|Publisher'].str.split('|',expand=True)
superhero_info.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [157]:
superhero_info = superhero_info.drop(columns=['Hero|Publisher'])
superhero_info.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


In [158]:
superhero_info['Measurements'].apply(pd.Series)

Unnamed: 0,0
0,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"
...,...
458,"{'Height': '183.0 cm', 'Weight': '83.0 kg'}"
459,"{'Height': '165.0 cm', 'Weight': '52.0 kg'}"
460,"{'Height': '66.0 cm', 'Weight': '17.0 kg'}"
461,"{'Height': '170.0 cm', 'Weight': '57.0 kg'}"


In [159]:
# Extract Height and Weight using regex
superhero_info['Height'] = superhero_info['Measurements'].str.extract("'Height': '(\d+.\d+) cm'")[0].astype(float)
superhero_info['Weight'] = superhero_info['Measurements'].str.extract("'Weight': '(\d+.\d+) kg'")[0].astype(float)

# Drop the original Measurements column
superhero_info = superhero_info.drop(columns=['Measurements'])
superhero_info.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height,Weight
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0,90.0
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0,441.0
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0,122.0


In [162]:
# Checking columns of each dataframe
print("Columns in superhero_info:", superhero_info.columns)
print("Columns in encoded_powers:", final_superhero_powers.columns)

Columns in superhero_info: Index(['Gender', 'Race', 'Alignment', 'Hair color', 'Eye color', 'Skin color',
       'Hero', 'Publisher', 'Height', 'Weight'],
      dtype='object')
Columns in encoded_powers: Index(['hero_names', 'Accelerated Healing', 'Adaptation', 'Agility',
       'Animal Attributes', 'Animal Control', 'Animal Oriented Powers',
       'Animation', 'Anti-Gravity', 'Astral Projection',
       ...
       'Vision - Thermal', 'Vision - X-Ray', 'Vitakinesis', 'Wallcrawling',
       'Water Control', 'Weapon-based Powers', 'Weapons Master',
       'Weather Control', 'Web Creation', 'Wind Control'],
      dtype='object', length=168)


In [164]:
# Rename name column to 'Hero'
final_superhero_powers = final_superhero_powers.rename(columns={'hero_names': 'Hero'})

In [165]:
# Merge the two dataframes on 'Hero' column
final_df = pd.merge(superhero_info, final_superhero_powers, left_on='Hero', right_on='Hero', how='inner')


In [166]:
average_weight_with_superspeed = final_df[final_df['Super Speed'] == 1]['Weight'].mean()
average_weight_without_superspeed = final_df[final_df['Super Speed'] == 0]['Weight'].mean()

print(f"Average weight with Super Speed: {average_weight_with_superspeed:.2f} kg")
print(f"Average weight without Super Speed: {average_weight_without_superspeed:.2f} kg")

Average weight with Super Speed: 129.40 kg
Average weight without Super Speed: 101.77 kg


In [167]:
average_height_by_publisher = final_df.groupby('Publisher')['Height'].mean()

print(average_height_by_publisher)

Publisher
DC Comics            181.923913
Dark Horse Comics    176.909091
George Lucas         159.600000
Image Comics         211.000000
Marvel Comics        191.546128
Shueisha             171.500000
Star Trek            181.500000
Team Epic TV         180.750000
Unknown              178.000000
Name: Height, dtype: float64


## Compare the average weight of super powers who have Super Speed to those who do not

Average weight with Super Speed: 129.40 kg

Average weight without Super Speed: 101.77 kg

## What is the average height of heroes for each publisher?

DC Comics            181.923913

Dark Horse Comics    176.909091

George Lucas         159.600000

Image Comics         211.000000

Marvel Comics        191.546128

Shueisha             171.500000

Star Trek            181.500000

Team Epic TV         180.750000

Unknown              178.000000
