In [None]:
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Load the penguins dataset
data = pd.read_csv("penglings.csv")

# Drop rows with 'nan' values in the specified columns
columns_to_check = ['flipper_length_mm', 'body_mass_g', 'bill_length_mm']
data = data.dropna(subset=columns_to_check)

In [None]:
# Normalize the bill length
min_bill_length = data["bill_length_mm"].min()
max_bill_length = data["bill_length_mm"].max()
data["normalized_bill_length"] = (data["bill_length_mm"] - min_bill_length) / (max_bill_length - min_bill_length)
data["scaled_sizes"] = data["normalized_bill_length"]

# This step is really important; otherwise, always show sizes nan should be an integer
data["sizes"] = data["scaled_sizes"]

# Replace NaN values in 'sizes' with a default value (0)
data["sizes"].fillna(0, inplace=True)  # Replace NaN with 0

# Create scatter plot using Matplotlib
colors = {'Adelie': 'orange', 'Gentoo': 'green', 'Chinstrap': 'purple'}

plt.scatter(data['flipper_length_mm'], data['body_mass_g'], c=data['species'].map(colors), s=data['sizes'] * 100)

# Customize the plot
plt.title('Scatter Plot')
plt.xlabel('Flipper Length (mm)')
plt.ylabel('Body Mass (g)')

# Create a legend
legend_labels = {species: plt.Line2D([0], [0], marker='o', color='w', label=species,
                                      markerfacecolor=colors[species], markersize=10)
                 for species in data['species'].unique()}
plt.legend(handles=legend_labels.values(), title='Species')

# Show the plot
plt.show()