In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import pandas as pd

In [None]:
penguins = sns.load_dataset("penguins")
penguins.fillna(0, inplace=True)
penguins = penguins.drop_duplicates()
penguins.head()

In [None]:
'''
species --> The type of penguins adelie ,chinstrap,gentoo
island --> The island in Antarctica where the penguin was observe.Torgersen, Biscoe, or Dream
bill lengthmm --> Length of the penguinâ€™s bill in millimeters (mm)
bill depth mm --> Depth (height) of the bill in mm
flipper lenght --> Length of the flipper in mm
body mass --> Weight of the penguin in grams
sex --> Male or Female
'''

In [None]:
# Count plot
plt.figure(figsize=(8,6))
sns.countplot(data=penguins, x='island', hue='species', palette='Set2')
plt.title("Penguin Species Count Across Islands", fontsize=16)
plt.xlabel("Island")
plt.ylabel("Number of Penguins")
plt.legend(title='Species')
plt.show()

In [None]:
# scatter plot
fig = px.scatter(
    penguins,
    x='bill_length_mm',
    y='bill_depth_mm',
    color='species',
    size='flipper_length_mm',
    hover_data=['body_mass_g', 'island', 'sex'],
    symbol='sex',
    title="Penguins: Bill Dimensions vs Flipper Length",
    labels={
        'bill_length_mm':'Bill Length (mm)',
        'bill_depth_mm':'Bill Depth (mm)',
        'flipper_length_mm':'Flipper Length (mm)'
    },
    template="plotly_white"
)

fig.update_layout(
    title_font_size=20,
    legend_title_font_size=14,
    legend_font_size=12
)

fig.show()

In [None]:
#pie chart
# Count penguins per island
island_counts = penguins['island'].value_counts()

# Pie chart
plt.figure(figsize=(7,7))
plt.pie(
    island_counts,
    labels=island_counts.index,
    autopct='%1.1f%%',
    colors=['#7fbfff', '#b3d9ff', '#d9ecff'],  # soft blue shades
    explode=[0.02, 0.02, 0.02],  # slight pop-out effect
    shadow=True
)

plt.title("Penguin Population Distribution Across Islands", fontsize=16)
plt.show()


In [None]:
# Keep only Male and Female, drop missing body mass
penguins_clean = penguins[penguins['sex'].isin(['Male','Female'])].dropna(subset=['body_mass_g'])

plt.figure(figsize=(6,5))
sns.violinplot(
    data=penguins_clean,
    x='sex',
    y='body_mass_g',
    palette=['#a6cee3','#fbb4ae']  # pale blue for Male, pale pink for Female
)
plt.title("Body Mass Distribution by Sex (All Species Combined)", fontsize=16)
plt.xlabel("Sex")
plt.ylabel("Body Mass (g)")
plt.show()



In [None]:
# Bar plot
# Group by species and calculate average body mass
avg_body_mass_species = penguins_clean.groupby('species')['body_mass_g'].mean().reset_index()

plt.figure(figsize=(7,5))
sns.barplot(
    data=avg_body_mass_species,
    x='species',
    y='body_mass_g',
    palette=['#a6cee3','#b2df8a','#fdbf6f']  # different pastel colors for each species
)
plt.title("Average Body Mass by Species (All Sexes Combined)", fontsize=16)
plt.xlabel("Species")
plt.ylabel("Average Body Mass (g)")

# Add value labels on top of bars
for index, row in avg_body_mass_species.iterrows():
    plt.text(index, row['body_mass_g']+20, round(row['body_mass_g'],1), ha='center', fontsize=12)

plt.show()

In [None]:
# heatmap
# Keep only numeric columns and drop missing values
penguins_numeric = penguins[['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g']].dropna()

# Calculate correlation matrix
corr = penguins_numeric.corr()

# Plot heatmap with blue shades
plt.figure(figsize=(6,5))
sns.heatmap(
    corr,
    annot=True,
    cmap='Blues',  # blue gradient
    fmt=".2f",
    vmin=-1, vmax=1   # keep scale consistent
)
plt.title("Correlation between Penguin Features (Blue Palette)", fontsize=16)
plt.show()


In [None]:
#pair plot

# Select only numeric columns
penguins_numeric = penguins[['bill_length_mm','bill_depth_mm','flipper_length_mm','body_mass_g','species']].dropna()

# Pair plot
sns.pairplot(
    penguins_numeric,
    hue="species",
    palette="Blues",
    diag_kind="kde",
    plot_kws={"alpha": 0.7}
)

plt.suptitle("Pair Plot of Penguin Measurements", y=1.02, fontsize=16)
plt.show()
