<a href="https://colab.research.google.com/github/john-a-dixon/applying-advanced-transformations/blob/main/applying_adv_tra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Applying Advanced Transformations**

_John Andrew Dixon_

---

##### **Imports**

In [63]:
import json
import pandas as pd

##### **Data Load**

In [64]:
# Remote URL to the superhero powers data
powers_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vQ2cMaGI74T_DqeCWDojRIyUCQqNZxsu_L3f42JJKV-_f873im-CBttJU8qn_Kan22qa71uCXfDWdMI/pub?output=csv"
# Load the data
powers_df = pd.read_csv(powers_url)
# Verify it loaded
powers_df.head()

Unnamed: 0,hero_names,Powers
0,3-D Man,"Agility,Super Strength,Stamina,Super Speed"
1,A-Bomb,"Accelerated Healing,Durability,Longevity,Super..."
2,Abe Sapien,"Agility,Accelerated Healing,Cold Resistance,Du..."
3,Abin Sur,Lantern Power Ring
4,Abomination,"Accelerated Healing,Intelligence,Super Strengt..."


In [65]:
# Remote URL to the superhero info data
info_url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vRgXxC_AvDsxnrZSMIuKnPMvrSrL6ZnoLogMNU-ZD8uOB88pbsINNtkii1PlMTgWPSyrqm5JJnU6pCr/pub?output=csv"
# Load the data
info_df = pd.read_csv(info_url)
# Verify it loaded
info_df.head()

Unnamed: 0,Hero|Publisher,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements
0,A-Bomb|Marvel Comics,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
1,Abe Sapien|Dark Horse Comics,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}"
2,Abin Sur|DC Comics,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}"
3,Abomination|Marvel Comics,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}"
4,Absorbing Man|Marvel Comics,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}"


--- 

## **Cleaning**

##### _Separate Hero From Publisher_

In [66]:
# Split the "Hero|Publisher" column using split and save the 
# result to two new columns named "Hero" and "Publisher"
info_df[["Hero", "Publisher"]] = info_df["Hero|Publisher"].str.split("|", expand=True)
# Drop the original column
info_df.drop("Hero|Publisher", axis=1, inplace=True)
# Verify it worked
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics


##### _Separate Height From Weight_

In [67]:
a = info_df.loc[0, "Measurements"]
type(json.loads(a.replace("'", '"')))

dict

In [68]:
# Replace all single quotes in the strings on the 
# "Measurements" column
info_df["Measurements"] = info_df["Measurements"].str.replace("'", '"')
# Apply JSON loads to the whole column to make the 
# string into a dictionary
info_df["Measurements"] = info_df["Measurements"].apply(json.loads)
# Verify it worked
info_df["Measurements"].head()

0    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
1     {'Height': '191.0 cm', 'Weight': '65.0 kg'}
2     {'Height': '185.0 cm', 'Weight': '90.0 kg'}
3    {'Height': '203.0 cm', 'Weight': '441.0 kg'}
4    {'Height': '193.0 cm', 'Weight': '122.0 kg'}
Name: Measurements, dtype: object

In [69]:
# Create a DataFrame with just the height and weights
heights_weights = info_df["Measurements"].apply(pd.Series)
heights_weights.head()

Unnamed: 0,Height,Weight
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg


In [70]:
# Rename the columns of the height weight dataframe
heights_weights.rename(columns = {"Height": "Height (cm)", "Weight": "Weight (kg)"}, inplace=True)
heights_weights.head()

Unnamed: 0,Height (cm),Weight (kg)
0,203.0 cm,441.0 kg
1,191.0 cm,65.0 kg
2,185.0 cm,90.0 kg
3,203.0 cm,441.0 kg
4,193.0 cm,122.0 kg


In [71]:
# Remove the units from the measurements strings and then cast each as a float
heights_weights["Height (cm)"] = heights_weights["Height (cm)"].str.split(" ", expand=True)[0].astype(float)
heights_weights["Weight (kg)"] = heights_weights["Weight (kg)"].str.split(" ", expand=True)[0].astype(float)
# Verify they are now floats
heights_weights.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 463 entries, 0 to 462
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Height (cm)  463 non-null    float64
 1   Weight (kg)  463 non-null    float64
dtypes: float64(2)
memory usage: 7.4 KB


In [72]:
info_df = pd.concat([info_df, heights_weights], axis=1)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Measurements,Hero,Publisher,Height (cm),Weight (kg)
0,Male,Human,good,No Hair,yellow,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,"{'Height': '191.0 cm', 'Weight': '65.0 kg'}",Abe Sapien,Dark Horse Comics,191.0,65.0
2,Male,Ungaran,good,No Hair,blue,red,"{'Height': '185.0 cm', 'Weight': '90.0 kg'}",Abin Sur,DC Comics,185.0,90.0
3,Male,Human / Radiation,bad,No Hair,green,Unknown,"{'Height': '203.0 cm', 'Weight': '441.0 kg'}",Abomination,Marvel Comics,203.0,441.0
4,Male,Human,bad,No Hair,blue,Unknown,"{'Height': '193.0 cm', 'Weight': '122.0 kg'}",Absorbing Man,Marvel Comics,193.0,122.0


In [73]:
info_df.drop(columns="Measurements", inplace=True)
info_df.head()

Unnamed: 0,Gender,Race,Alignment,Hair color,Eye color,Skin color,Hero,Publisher,Height (cm),Weight (kg)
0,Male,Human,good,No Hair,yellow,Unknown,A-Bomb,Marvel Comics,203.0,441.0
1,Male,Icthyo Sapien,good,No Hair,blue,blue,Abe Sapien,Dark Horse Comics,191.0,65.0
2,Male,Ungaran,good,No Hair,blue,red,Abin Sur,DC Comics,185.0,90.0
3,Male,Human / Radiation,bad,No Hair,green,Unknown,Abomination,Marvel Comics,203.0,441.0
4,Male,Human,bad,No Hair,blue,Unknown,Absorbing Man,Marvel Comics,193.0,122.0
