In [1]:
import pandas as pd
import altair as alt

# Please Note: For this atlair replication, I am choosing to employ a technical achievement:
# I am filling in every NA value in the loaded dataset with the current average value (for quantities) or most frequent value (for categorical data) in that column.
# I am employing this technical improvement because NA/NAN values are unhashable in Python, which creates errors when trying to scatter all of the data.

# I referenced these articles to help me construct the altair replication: 
# https://nextjournal.com/sdanisch/scales-axes-and-legends
# https://python.plainenglish.io/library-of-the-week-15-altair-073598250420

column_list = ["species", "bill_length_mm", "flipper_length_mm", "body_mass_g"]
species_domain = ["Adelie", "Chinstrap", "Gentoo"]
species_color_range = ["#FF9013", "#9932CC", "#048B8C"]

dataframe = pd.read_csv(
    "../penglings.csv",
    usecols=column_list,  # Select only subsets of columns from the csv file, which allows for more efficient file reading.
    na_filter=False,  # Do not filter data rows with NA values.
)

# Fill in the NA values using the current mean or mode (mode used for species name) of the selected column:
for (feature_name, feature_data) in dataframe.items():
    if feature_name == column_list[0]:
        dataframe[feature_name].fillna(value=dataframe[feature_name].mode(), inplace=True)
    else: 
        dataframe[feature_name] = pd.to_numeric(dataframe[feature_name], errors='coerce')
        dataframe[feature_name].fillna(value=dataframe[feature_name].mean(), inplace=True)

alt.Chart(dataframe, title="Scatter Plot of Body Mass (g) vs. Flipper Length (mm) of Penguins").mark_circle(opacity=0.8).encode(
    alt.X("flipper_length_mm:Q", title="Flipper Length (mm)", sort=alt.EncodingSortField('x', order='ascending'), scale=alt.Scale(domain=[169, 235]), axis=alt.Axis(tickCount=7)),
    alt.Y("body_mass_g:Q", title="Body Mass (g)", sort=alt.EncodingSortField('y', order='descending'), scale=alt.Scale(domain=[2500, 6500]), axis=alt.Axis(tickCount=4)),
    alt.Color("species",
              scale=alt.Scale(domain=species_domain, range=species_color_range),
              legend=alt.Legend(
                  title="species",
                  orient='right',
                  titleFontSize=12,
                  labelFontSize=12)),
    alt.Size("bill_length_mm:Q", title="bill_length_mm"),
    tooltip=["species", "flipper_length_mm:Q", "body_mass_g:Q", "bill_length_mm:Q"]
).interactive().properties(
    height=450, width=600
).configure_axis(
    titleFontSize=15,
    labelFontSize=15,
)