In [None]:
library(ggplot2)
library(dplyr)

# Load the penguins dataset
data <- read.csv("penglings.csv")

# Drop rows with 'nan' values in the specified columns
data <- na.omit(data[c("flipper_length_mm", "body_mass_g", "bill_length_mm")])

# Normalize the bill length
min_bill_length <- min(data$bill_length_mm)
max_bill_length <- max(data$bill_length_mm)
data$normalized_bill_length <- (data$bill_length_mm - min_bill_length) / (max_bill_length - min_bill_length)
data$scaled_sizes <- data$normalized_bill_length

# Replace NA values in 'scaled_sizes' with a default value (0)
data$scaled_sizes[is.na(data$scaled_sizes)] <- 0

# Create the plot
plot <- ggplot(data, aes(x = flipper_length_mm, y = body_mass_g, color = species, size = scaled_sizes)) +
  geom_point(alpha = 0.8) +
  scale_color_manual(values = c("Adelie" = "orange", "Gentoo" = "green", "Chinstrap" = "purple")) +
  labs(x = "Flipper Length (mm)", y = "Body Mass (g)") +
  theme_minimal() +
  theme(legend.position = "right")

# Add annotations for small ball and large ball
plot <- plot +
  annotate("text", x = Inf, y = Inf, label = "Small Ball (bill length mm: 40)", hjust = 1, vjust = 1, color = "black") +
  annotate("text", x = Inf, y = Inf, label = "Large Ball (bill length mm: 50)", hjust = 1, vjust = 1, color = "black")

# Show the plot
print(plot)
