In [1]:
import numpy as np

In [2]:
!pip install pandas
!pip install requests
!pip install Pillow



In [3]:
import requests
import pandas as pd

## Data Import

In [4]:
url = 'https://cdn.jsdelivr.net/gh/akabab/superhero-api@0.3.0/api/all.json'

response = requests.get(url)

all_superheroes_data = response.json()
df = pd.DataFrame(all_superheroes_data)

In [5]:
df = pd.DataFrame(all_superheroes_data)
    
# Split columns containing dictionaries into separate columns
# Powerstats
df_powerstats = pd.json_normalize(df['powerstats'])
df_powerstats.columns = [f'powerstats_{col}' for col in df_powerstats.columns]
    
# Appearance
df_appearance = pd.json_normalize(df['appearance'])
df_appearance.columns = [f'appearance_{col}' for col in df_appearance.columns]
    
# Biography
df_biography = pd.json_normalize(df['biography'])
df_biography.columns = [f'biography_{col}' for col in df_biography.columns]
    
# Work
df_work = pd.json_normalize(df['work'])
df_work.columns = [f'work_{col}' for col in df_work.columns]
    
# Connections
df_connections = pd.json_normalize(df['connections'])
df_connections.columns = [f'connections_{col}' for col in df_connections.columns]
    
# Images
df_images = pd.json_normalize(df['images'])
df_images.columns = [f'images_{col}' for col in df_images.columns]
    
# Concatenate all DataFrames
df = pd.concat([df, df_powerstats, df_appearance, df_biography, df_work, df_connections, df_images], axis=1)
    
# Drop original columns containing dictionaries
df.drop(['powerstats', 'appearance', 'biography', 'work', 'connections', 'images'], axis=1, inplace=True)

## Data Cleaning

In [6]:
#Summary
#Drop Columns
#Check NaN Values
#replace NaN values with unknown
#Check Duplicates

In [7]:
df.drop(['images_xs', 'images_sm', 'images_md'], axis=1, inplace=True)

In [8]:
df_cleaned = df.drop(['slug', 'connections_relatives', 'biography_aliases', 'biography_placeOfBirth','appearance_hairColor', 'appearance_eyeColor', 'biography_alterEgos'], axis=1)

In [9]:
df_cleaned['biography_alignment'].values

array(['good', 'good', 'good', 'bad', 'bad', 'bad', 'good', 'good',
       'good', 'good', 'bad', 'bad', 'good', 'bad', 'good', 'bad', 'bad',
       'good', 'good', 'good', 'good', 'good', 'bad', 'good', 'good',
       'bad', '-', 'bad', 'good', 'good', 'good', 'good', 'good', 'bad',
       'good', 'good', 'good', 'good', 'good', 'bad', 'good', 'good',
       'good', 'bad', 'good', 'bad', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'bad', 'good', 'bad', 'good', 'good',
       'good', 'good', 'neutral', 'bad', 'good', 'good', 'good', 'good',
       'neutral', 'good', 'good', 'bad', 'bad', 'good', 'good', 'bad',
       'bad', '-', 'good', 'good', 'good', 'bad', 'bad', 'bad', 'good',
       'good', 'bad', 'good', 'good', 'bad', 'good', '-', 'good', 'bad',
       'good', 'good', 'good', 'bad', 'good', 'good', 'good', 'good',
       'good', 'neutral', 'good', 'good', 'good', 'good', 'good', 'bad',
    

In [10]:
df_cleaned.columns

Index(['id', 'name', 'powerstats_intelligence', 'powerstats_strength',
       'powerstats_speed', 'powerstats_durability', 'powerstats_power',
       'powerstats_combat', 'appearance_gender', 'appearance_race',
       'appearance_height', 'appearance_weight', 'biography_fullName',
       'biography_firstAppearance', 'biography_publisher',
       'biography_alignment', 'work_occupation', 'work_base',
       'connections_groupAffiliation', 'images_lg'],
      dtype='object')

In [11]:
unique_values_per_column = {}

for col in df_cleaned.columns:
    # Check if the column contains lists
    if df_cleaned[col].apply(type).eq(list).any():
        # Flatten the lists within the column
        flattened_values = [item for sublist in df_cleaned[col] for item in sublist]
        # Get unique values after flattening
        unique_values_per_column[col] = set(flattened_values)
    else:
        # Get unique values for non-list columns
        unique_values_per_column[col] = df_cleaned[col].unique()

unique_values_per_column

{'id': array([  1,   2,   3,   4,   5,   6,   7,   8,  10,  11,  12,  13,  14,
         15,  17,  18,  20,  23,  24,  25,  26,  28,  29,  30,  31,  32,
         34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  48,
         49,  52,  53,  56,  57,  58,  60,  61,  62,  63,  66,  68,  69,
         70,  71,  72,  73,  75,  76,  78,  79,  80,  81,  82,  83,  84,
         87,  88,  92,  93,  95,  96,  97,  98,  99, 100, 102, 103, 104,
        105, 106, 107, 109, 110, 111, 112, 114, 115, 118, 119, 120, 121,
        126, 127, 130, 135, 136, 137, 139, 140, 141, 142, 144, 145, 146,
        147, 148, 149, 150, 151, 152, 154, 156, 157, 158, 160, 162, 165,
        167, 169, 170, 171, 172, 174, 176, 177, 178, 180, 181, 185, 186,
        188, 191, 194, 195, 196, 198, 200, 201, 202, 203, 204, 206, 207,
        208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
        221, 222, 224, 225, 226, 227, 228, 230, 231, 232, 233, 234, 235,
        236, 237, 238, 239, 240, 241, 242, 24

In [12]:
nunique_values_per_column = {}

for col, values in unique_values_per_column.items():
    nunique_values_per_column[col] = len(values)

nunique_values_per_column

{'id': 563,
 'name': 555,
 'powerstats_intelligence': 19,
 'powerstats_strength': 58,
 'powerstats_speed': 43,
 'powerstats_durability': 41,
 'powerstats_power': 95,
 'powerstats_combat': 38,
 'appearance_gender': 3,
 'appearance_race': 62,
 'appearance_height': 118,
 'appearance_weight': 294,
 'biography_fullName': 490,
 'biography_firstAppearance': 472,
 'biography_publisher': 69,
 'biography_alignment': 4,
 'work_occupation': 328,
 'work_base': 245,
 'connections_groupAffiliation': 422,
 'images_lg': 558}

In [13]:
df_cleaned.isnull().sum()

id                                0
name                              0
powerstats_intelligence           0
powerstats_strength               0
powerstats_speed                  0
powerstats_durability             0
powerstats_power                  0
powerstats_combat                 0
appearance_gender                 0
appearance_race                 164
appearance_height                 0
appearance_weight                 0
biography_fullName                0
biography_firstAppearance         0
biography_publisher               6
biography_alignment               0
work_occupation                   0
work_base                         0
connections_groupAffiliation      0
images_lg                         0
dtype: int64

In [14]:
df_cleaned['appearance_race'] = df_cleaned['appearance_race'].fillna('unknown')

In [15]:
df_cleaned['biography_publisher'] = df_cleaned['biography_publisher'].fillna('unknown')

In [16]:
for col in df_cleaned.columns:
    if df_cleaned[col].apply(type).eq(list).any():
        df_cleaned[col] = df_cleaned[col].apply(tuple)

# Find duplicate rows
duplicate_rows = df_cleaned.duplicated()

# Count the number of duplicate rows
num_duplicates = duplicate_rows.sum()

print("Number of duplicate rows:", num_duplicates)

Number of duplicate rows: 0


In [17]:
df_cleaned['name'].duplicated().sum()

8

In [18]:
df_cleaned['id'].duplicated().sum()

0

In [19]:
duplicates_in_column = df_cleaned[df_cleaned['name'].duplicated(keep=False)]
#I will keep duplicated names because they are different versions of the character with different ID's

In [20]:
display(df_cleaned)

Unnamed: 0,id,name,powerstats_intelligence,powerstats_strength,powerstats_speed,powerstats_durability,powerstats_power,powerstats_combat,appearance_gender,appearance_race,appearance_height,appearance_weight,biography_fullName,biography_firstAppearance,biography_publisher,biography_alignment,work_occupation,work_base,connections_groupAffiliation,images_lg
0,1,A-Bomb,38,100,17,80,24,64,Male,Human,"(6'8, 203 cm)","(980 lb, 441 kg)",Richard Milhouse Jones,"Hulk Vol 2 #2 (April, 2008) (as A-Bomb)",Marvel Comics,good,"Musician, adventurer, author; formerly talk sh...",-,"Hulk Family; Excelsior (sponsor), Avengers (ho...",https://cdn.jsdelivr.net/gh/akabab/superhero-a...
1,2,Abe Sapien,88,28,35,65,100,85,Male,Icthyo Sapien,"(6'3, 191 cm)","(145 lb, 65 kg)",Abraham Sapien,Hellboy: Seed of Destruction (1993),Dark Horse Comics,good,Paranormal Investigator,-,Bureau for Paranormal Research and Defense,https://cdn.jsdelivr.net/gh/akabab/superhero-a...
2,3,Abin Sur,50,90,53,64,99,65,Male,Ungaran,"(6'1, 185 cm)","(200 lb, 90 kg)",,"Showcase #22 (October, 1959)",DC Comics,good,"Green Lantern, former history professor",Oa,"Green Lantern Corps, Black Lantern Corps",https://cdn.jsdelivr.net/gh/akabab/superhero-a...
3,4,Abomination,63,80,53,90,62,95,Male,Human / Radiation,"(6'8, 203 cm)","(980 lb, 441 kg)",Emil Blonsky,Tales to Astonish #90,Marvel Comics,bad,Ex-Spy,Mobile,former member of the crew of the Andromeda Sta...,https://cdn.jsdelivr.net/gh/akabab/superhero-a...
4,5,Abraxas,88,63,83,100,100,55,Male,Cosmic Entity,"(-, 0 cm)","(- lb, 0 kg)",Abraxas,Fantastic Four Annual #2001,Marvel Comics,bad,Dimensional destroyer,-,Cosmic Beings,https://cdn.jsdelivr.net/gh/akabab/superhero-a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,727,Yellowjacket II,50,10,35,28,31,28,Female,Human,"(5'5, 165 cm)","(115 lb, 52 kg)",Rita DeMara,Avengers #264,Marvel Comics,good,"Adventurer; former criminal, electronics engineer",New York City area,"Formerly Guardians of the Galaxy, Avengers, Ma...",https://cdn.jsdelivr.net/gh/akabab/superhero-a...
559,728,Ymir,50,100,27,100,98,28,Male,Frost Giant,"(1000, 304.8 meters)","(- lb, 0 kg)",Ymir,JOURNEY INTO MYSTERY #97,Marvel Comics,good,-,Niffleheim,-,https://cdn.jsdelivr.net/gh/akabab/superhero-a...
560,729,Yoda,88,52,33,25,100,90,Male,Yoda's species,"(2'2, 66 cm)","(38 lb, 17 kg)",Yoda,Star Wars: Episode V - The Empire Strikes Back...,George Lucas,good,-,-,"Jedi Order, Jedi High Counsl, Galactic Republic",https://cdn.jsdelivr.net/gh/akabab/superhero-a...
561,730,Zatanna,81,10,23,28,100,56,Female,Human,"(5'7, 170 cm)","(127 lb, 57 kg)",Zatanna Zatara,Hawkman #4,DC Comics,good,-,-,"Misty Kilgore, Seven Soldiers of Victory, Just...",https://cdn.jsdelivr.net/gh/akabab/superhero-a...


## Data Formatting

In [21]:
#Summary
#Format height and weight to kg and cm
#replace biography_alignment values with emojis
#Set everything to capitalize letters
#Set the ID columns as index
#Create a mapping to change all column names
#Replace all 0 cm and kg with unknown

In [22]:
df_cleaned_formatted = df_cleaned.copy()

In [23]:
def extract_value(cell):
    return cell[1]

df_cleaned_formatted['appearance_height'] = df_cleaned_formatted['appearance_height'].apply(extract_value)
df_cleaned_formatted['appearance_weight'] = df_cleaned_formatted['appearance_weight'].apply(extract_value)

In [24]:
alignment_emojis = {
    'good': 'üòá',
    'neutral': 'üòê',
    'bad': 'üòà',
    '-': 'üòê'
}

df_cleaned_formatted['biography_alignment'] = df_cleaned_formatted['biography_alignment'].replace(alignment_emojis)

In [25]:
def capitalize_words(sentence):
    return ' '.join(word.capitalize() for word in sentence.split())

df_cleaned_formatted = df_cleaned_formatted.apply(lambda x: x.map(lambda x: capitalize_words(x) if isinstance(x, str) else x))

In [26]:
df_cleaned_formatted.set_index('id', inplace=True)
df_cleaned_formatted.index.name = 'id'

In [27]:
column_name_mapping = {
    'appearance_gender': 'Gender',
    'appearance_race': 'Race',
    'appearance_height': 'Height',
    'appearance_weight': 'Weight',
    'biography_fullName': 'Full Name',
    'biography_firstAppearance': 'First Appearance',
    'biography_publisher': 'Publisher',
    'biography_alignment': 'Alignment',
    'name': 'Name',
    'powerstats_intelligence': 'PS Intelligence',
    'powerstats_strength': 'PS Strength',
    'powerstats_speed': 'PS Speed',
    'powerstats_durability': 'PS Durability',
    'powerstats_power': 'PS Power',
    'powerstats_combat': 'PS Combat',
    'work_occupation': 'Occupation',
    'work_base': 'Base',
    'connections_groupAffiliation': 'Affiliation',
    'images_lg': 'Image'
}

df_cleaned_formatted.rename(columns=column_name_mapping, inplace=True)

In [28]:
skills_columns = ['PS Intelligence', 'PS Strength', 'PS Speed', 'PS Durability', 'PS Power', 'PS Combat']

df_cleaned_formatted.insert(7, 'Overall PS', df_cleaned_formatted[skills_columns].sum(axis=1))

In [29]:
df_cleaned_formatted['Height'] = df_cleaned_formatted['Height'].replace('0 Cm', 'Unknown')
df_cleaned_formatted['Weight'] = df_cleaned_formatted['Weight'].replace('0 Kg', 'Unknown')

In [30]:
display(df_cleaned_formatted)

Unnamed: 0_level_0,Name,PS Intelligence,PS Strength,PS Speed,PS Durability,PS Power,PS Combat,Overall PS,Gender,Race,Height,Weight,Full Name,First Appearance,Publisher,Alignment,Occupation,Base,Affiliation,Image
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
1,A-bomb,38,100,17,80,24,64,323,Male,Human,203 Cm,441 Kg,Richard Milhouse Jones,"Hulk Vol 2 #2 (april, 2008) (as A-bomb)",Marvel Comics,üòá,"Musician, Adventurer, Author; Formerly Talk Sh...",-,"Hulk Family; Excelsior (sponsor), Avengers (ho...",Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
2,Abe Sapien,88,28,35,65,100,85,401,Male,Icthyo Sapien,191 Cm,65 Kg,Abraham Sapien,Hellboy: Seed Of Destruction (1993),Dark Horse Comics,üòá,Paranormal Investigator,-,Bureau For Paranormal Research And Defense,Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
3,Abin Sur,50,90,53,64,99,65,421,Male,Ungaran,185 Cm,90 Kg,,"Showcase #22 (october, 1959)",Dc Comics,üòá,"Green Lantern, Former History Professor",Oa,"Green Lantern Corps, Black Lantern Corps",Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
4,Abomination,63,80,53,90,62,95,443,Male,Human / Radiation,203 Cm,441 Kg,Emil Blonsky,Tales To Astonish #90,Marvel Comics,üòà,Ex-spy,Mobile,Former Member Of The Crew Of The Andromeda Sta...,Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
5,Abraxas,88,63,83,100,100,55,489,Male,Cosmic Entity,Unknown,Unknown,Abraxas,Fantastic Four Annual #2001,Marvel Comics,üòà,Dimensional Destroyer,-,Cosmic Beings,Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
727,Yellowjacket Ii,50,10,35,28,31,28,182,Female,Human,165 Cm,52 Kg,Rita Demara,Avengers #264,Marvel Comics,üòá,"Adventurer; Former Criminal, Electronics Engineer",New York City Area,"Formerly Guardians Of The Galaxy, Avengers, Ma...",Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
728,Ymir,50,100,27,100,98,28,403,Male,Frost Giant,304.8 Meters,Unknown,Ymir,Journey Into Mystery #97,Marvel Comics,üòá,-,Niffleheim,-,Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
729,Yoda,88,52,33,25,100,90,388,Male,Yoda's Species,66 Cm,17 Kg,Yoda,Star Wars: Episode V - The Empire Strikes Back...,George Lucas,üòá,-,-,"Jedi Order, Jedi High Counsl, Galactic Republic",Https://cdn.jsdelivr.net/gh/akabab/superhero-a...
730,Zatanna,81,10,23,28,100,56,298,Female,Human,170 Cm,57 Kg,Zatanna Zatara,Hawkman #4,Dc Comics,üòá,-,-,"Misty Kilgore, Seven Soldiers Of Victory, Just...",Https://cdn.jsdelivr.net/gh/akabab/superhero-a...


## Filtering Data

In [41]:
#Summary
#Filter top 10 strongest characters
#And display with images

In [39]:
from IPython.display import display, HTML

# Assuming df_cleaned_formatted contains your data
top_10_overall_ps = df_cleaned_formatted.nlargest(10, 'Overall PS')

# Define a formatting function to render URLs as images
def render_image(url):
    return f'<img src="{url}" style="max-width:75px; height:auto;">'

# Apply the formatting function to the Image column
top_10_overall_ps['Image'] = top_10_overall_ps['Image'].apply(render_image)

# Convert DataFrame to HTML
html = top_10_overall_ps.to_html(escape=False)

display(HTML(html))


Unnamed: 0_level_0,Name,PS Intelligence,PS Strength,PS Speed,PS Durability,PS Power,PS Combat,Overall PS,Gender,Race,Height,Weight,Full Name,First Appearance,Publisher,Alignment,Occupation,Base,Affiliation,Image
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
426,Man Of Miracles,100,100,100,100,100,100,600,-,God / Eternal,Unknown,Unknown,,Spawn #75,Image Comics,üòê,-,-,-,
503,One-above-all,100,100,100,100,100,100,600,-,Cosmic Entity,Unknown,Unknown,,Doctor Strange Vol 2 #13,Marvel Comics,üòê,-,-,-,
278,General Zod,94,100,96,100,100,95,585,Male,Kryptonian,Unknown,Unknown,Dru-zod,Action Comics #845,Dc Comics,üòà,-,Formerly Krypton; Formerly The Phantom Zone; Currently Earth,Phantom Zone Criminals; Former Member Of The Kryptonian Defense Council,
80,Beyonder,100,100,100,100,100,84,584,Male,God / Eternal,Unknown,Unknown,,Marvel Super-heroes Secret Wars #1,Marvel Comics,üòá,Former Student,-,Was Once In A Merged Form With The Molecule Man But Is No Longer Merged With Him.,
642,Superboy-prime,94,100,100,100,100,85,579,Male,Kryptonian,180 Cm,77 Kg,Kal-el,"Dc Comics Presents #87 (november, 1985)",Dc Comics,üòà,-,"Currently A Recreation Of Earth-prime, Formerly Qward, Anti-matter Universe.","Formerly Black Lantern Corps, Legion Of Super-villains, Sinestro Corps",
644,Superman,94,100,100,100,100,85,579,Male,Kryptonian,191 Cm,101 Kg,Clark Kent,Action Comics #1,Superman Prime One-million,üòá,Reporter For The Daily Planet And Novelist,Metropolis,"Justice League Of America, The Legion Of Super-heroes (pre-crisis As Superboy); Justice Society Of America (pre-crisis Earth-2 Version); All-star Squadron (pre-crisis Earth-2 Version)",
637,Steppenwolf,94,100,83,100,100,100,577,Male,New God,183 Cm,91 Kg,,The New Gods #7,Dc Comics,üòà,-,-,-,
432,Martian Manhunter,100,95,92,100,100,85,572,Male,Martian,201 Cm,135 Kg,J'onn J'onzz,Detective Comics #225 (1955),Dc Comics,üòá,-,-,"Darkstars, Justice League Of Aliens, Justice League Of America, Justice League Task Force, Justice League Unlimited, Martians, Outsiders",
524,Power Girl,94,100,100,100,100,75,569,Female,Kryptonian,180 Cm,81 Kg,Kara Zor-l,All-star Comics #58 (january/february 1976),Dc Comics,üòá,-,-,"Justice Society Of America, Justice League Europe, Infinity, Inc., Sovereign Seven",
643,Supergirl,94,100,100,100,100,75,569,Female,Kryptonian,165 Cm,54 Kg,Kara Zor-el,"Superman/batman #8 (may, 2004)",Dc Comics,üòá,"Intern, Adventurer, Student, Crime-fighter",Metropolis,"Formerly Justice League Of America, Supermen Of America, Justice League Of Amazons, Kent Family, Superman Family, Kryptonian Science Guild, Teen Titans, Legion Of Super-heroes, Outsiders",
