In [None]:
# Importing required libraries
import pandas as pd

# Loading the datasets
info_df = pd.read_csv("/mnt/data/superhero_info - superhero_info.csv")
powers_df = pd.read_csv("/mnt/data/superhero_powers - superhero_powers.csv")

# Displaying the first few rows of each dataframe for inspection
info_df.head(), powers_df.head()


In [None]:
# Splitting the "Hero|Publisher" column
info_df[['Hero', 'Publisher']] = info_df['Hero|Publisher'].str.split('|', expand=True)
info_df.drop('Hero|Publisher', axis=1, inplace=True)

# Extracting height and weight from the "Measurements" column
info_df['Height'] = info_df['Measurements'].str.extract("'Height': '(\d+.\d+) cm'") 
info_df['Weight'] = info_df['Measurements'].str.extract("'Weight': '(\d+.\d+) kg'")

# Converting height and weight to numeric values
info_df['Height'] = pd.to_numeric(info_df['Height'], errors='coerce')
info_df['Weight'] = pd.to_numeric(info_df['Weight'], errors='coerce')

# Dropping the original "Measurements" column
info_df.drop('Measurements', axis=1, inplace=True)

info_df.head()


In [None]:
# Splitting the powers from the 'Powers' column
powers_list = powers_df['Powers'].str.split(', ', expand=True).stack().unique()

# One-hot-encoding the powers
for power in powers_list:
    powers_df[power] = powers_df['Powers'].str.contains(power).astype(int)
    
# Dropping the original 'Powers' column
powers_df.drop('Powers', axis=1, inplace=True)

powers_df.head()


In [None]:
# Merging the two dataframes on the "Hero" column
combined_df = pd.merge(info_df, powers_df, left_on='Hero', right_on='hero_names', how='inner')

# Dropping the duplicate 'hero_names' column from the merged dataframe
combined_df.drop('hero_names', axis=1, inplace=True)

combined_df.head()


In [None]:
# Comparing the average weight of superheroes with and without Super Speed
average_weight_with_speed = combined_df[combined_df['Super Speed'] == 1]['Weight'].mean()
average_weight_without_speed = combined_df[combined_df['Super Speed'] == 0]['Weight'].mean()

average_weight_with_speed, average_weight_without_speed


In [None]:
# Calculating the average height of heroes for each publisher
average_height_per_publisher = combined_df.groupby('Publisher')['Height'].mean()

average_height_per_publisher
