In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Set the style for better-looking plots
sns.set_style("whitegrid")

# Load the Palmer Penguins dataset
penguins = sns.load_dataset("penguins")

# Display the first few rows and basic information about the dataset
print(penguins.head())
print(penguins.info())

## Task 1: Exploring Distributions with Histograms

1. Create histograms for ‘bill_length_mm’, ‘bill_depth_mm’, ‘flipper_length_mm’, and ‘body_mass_g’.

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (8, 8))
ax1 = sns.histplot(data = penguins, x = 'bill_length_mm', bins = 20, ax = axes[0,0])
ax2 = sns.histplot(data = penguins, x = 'bill_depth_mm', bins = 20, ax = axes[1,0])
ax3 = sns.histplot(data = penguins, x = 'flipper_length_mm', bins = 20, ax = axes[0,1])
ax4 = sns.histplot(data = penguins, x = 'body_mass_g', bins = 20, ax = axes[1,1])

2. Experiment with different numbers of bins to see how it affects the visualization.

In [None]:
# done above

3. Try using sns.histplot() with the ‘kde’ parameter set to True to overlay a kernel density estimate.

In [None]:
fig, axes = plt.subplots(2, 2, figsize = (8, 8))
ax1 = sns.histplot(data = penguins, x = 'bill_length_mm', bins = 20, ax = axes[0,0], kde = True)
ax2 = sns.histplot(data = penguins, x = 'bill_depth_mm', bins = 20, ax = axes[1,0], kde = True)
ax3 = sns.histplot(data = penguins, x = 'flipper_length_mm', bins = 20, ax = axes[0,1], kde = True)
ax4 = sns.histplot(data = penguins, x = 'body_mass_g', bins = 20, ax = axes[1,1], kde = True)

## Task 2: Examining Correlations

1. Use sns.pairplot() to create a grid of scatter plots for all numeric variables.

In [None]:
sns.pairplot(penguins)
plt.suptitle('Overview of Penguin Measurements')
plt.show()

2. Modify the pairplot to show the species information using different colors.

In [None]:
sns.pairplot(penguins, hue = 'species')
plt.suptitle('Overview of Penguin Measurements')
plt.show()

3. Interpret the pairplot: which variables seem to be most strongly correlated? Do you notice any patterns related to species?

Adelie and Chinstrap seem to be more closely related than Gentoo. 

## Task 3: Investigating Relationships with Regression Plots

1. Create a regression plot (sns.regplot) showing the relationship between ‘flipper_length_mm’ and ‘body_mass_g’.

In [None]:
sns.regplot(data = penguins, x = 'flipper_length_mm', y = 'body_mass_g')

2. Create another regplot showing the relationship between ‘bill_length_mm’ and ‘bill_depth_mm’.

In [None]:
sns.regplot(data = penguins, x = 'bill_length_mm', y = 'bill_depth_mm')

3. Try adding the ‘species’ information to one of these plots using different colors. Hint: You might want to use sns.lmplot for this.

In [None]:
sns.lmplot(data = penguins, x = 'bill_length_mm', y = 'bill_depth_mm', hue = 'species')

## Task 4: Joint Distribution Plots

1. Create a joint plot for ‘flipper_length_mm’ and ‘body_mass_g’.

In [None]:
sns.jointplot(data = penguins, x = 'flipper_length_mm', y = 'body_mass_g')

2. Experiment with different kind parameters in the joint plot (e.g., ‘scatter’, ‘kde’, ‘hex’).

In [None]:
sns.jointplot(data = penguins, x = 'flipper_length_mm', y = 'body_mass_g', kind='kde')

3. Create another joint plot, this time for ‘bill_length_mm’ and ‘bill_depth_mm’, colored by species.

In [None]:
sns.jointplot(data = penguins, x = 'bill_length_mm', y = 'bill_depth_mm', kind='hex')

## BONUS CHALLENGE

In [None]:
corr_matrix = penguins.select_dtypes(include=np.number).corr()

In [None]:
sns.heatmap(data = corr_matrix)