# **Seaborn Visualization Exercises**

#Use the seaborn dataset "titanic"

# EX 1 - Distribution Plots

Create a histogram of a given dataset using `histplot`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Load Titanic dataset
titanic = sns.load_dataset("titanic")

# Create histogram for the 'age' column
plt.figure(figsize=(8, 6))
sns.histplot(data=titanic, x="age", bins=30, kde=True)
plt.title("Histogram of Titanic Passengers' Age")
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.show()

Generate a KDE plot of the same dataset using `kdeplot`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Create KDE plot for the 'age' column
plt.figure(figsize=(8, 6))
sns.kdeplot(data=titanic, x="age", fill=True)
plt.title("KDE Plot of Titanic Passengers' Age")
plt.xlabel("Age")
plt.ylabel("Density")
plt.show()

Use `jointplot` to display the relationship between two numerical variables.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Jointplot of Age vs Fare
sns.jointplot(data=titanic, x="age", y="fare", kind="scatter")

plt.show()

Create a `pairplot` to visualize pairwise relationships in a dataset.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Select a subset of numerical columns
num_cols = ["age", "fare", "sibsp", "parch"]

# Create pairplot
sns.pairplot(data=titanic[num_cols].dropna())

plt.show()


## EX 2 - Categorical Data Plots
Create a `barplot` to show the average values of a categorical variable.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Create barplot of average age by class
plt.figure(figsize=(8,6))
sns.barplot(data=titanic, x="class", y="age", ci=None)
plt.title("Average Age by Passenger Class")
plt.xlabel("Passenger Class")
plt.ylabel("Average Age")
plt.show()


Generate a `countplot` to show the frequency of different categories.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Create countplot of passenger classes
plt.figure(figsize=(8,6))
sns.countplot(data=titanic, x="class")
plt.title("Passenger Count by Class")
plt.xlabel("Passenger Class")
plt.ylabel("Count")
plt.show()


Use `boxplot` to visualize the distribution of numerical data across categories.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Boxplot of age distribution across passenger classes
plt.figure(figsize=(8,6))
sns.boxplot(data=titanic, x="class", y="age")
plt.title("Age Distribution by Passenger Class")
plt.xlabel("Passenger Class")
plt.ylabel("Age")
plt.show()


Create a `violinplot` to combine KDE and boxplot visualizations.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Violinplot of age across passenger classes
plt.figure(figsize=(8,6))
sns.violinplot(data=titanic, x="class", y="age")
plt.title("Age Distribution by Passenger Class (Violinplot)")
plt.xlabel("Passenger Class")
plt.ylabel("Age")
plt.show()


Display individual data points in a category using `stripplot`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Stripplot of passenger ages across classes
plt.figure(figsize=(8,6))
sns.stripplot(data=titanic, x="class", y="age", jitter=True, alpha=0.6)
plt.title("Passenger Age by Class (Stripplot)")
plt.xlabel("Passenger Class")
plt.ylabel("Age")
plt.show()


Use `swarmplot` to create a more readable version of `stripplot`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Swarmplot of passenger ages across classes
plt.figure(figsize=(8,6))
sns.swarmplot(data=titanic, x="class", y="age", size=4, alpha=0.7)
plt.title("Passenger Age by Class (Swarmplot)")
plt.xlabel("Passenger Class")
plt.ylabel("Age")
plt.show()


# EX 3 - Matrix Plots
Create a heatmap to display correlations in a dataset using `heatmap`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Select only numeric columns for correlation
numeric_data = titanic.select_dtypes(include=["float64", "int64"])

# Compute correlation matrix
corr = numeric_data.corr()

# Create heatmap
plt.figure(figsize=(10,6))
sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.title("Correlation Heatmap - Titanic Dataset")
plt.show()


Use `clustermap` to cluster data based on similarity.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Keep only numeric columns
numeric_data = titanic.select_dtypes(include=["float64", "int64"])

# Compute correlation matrix
corr = numeric_data.corr()

# Create clustered heatmap
sns.clustermap(corr, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
plt.suptitle("Clustered Correlation Heatmap - Titanic Dataset", y=1.02)
plt.show()


# EX 4 - Grids
Implement a `PairGrid` to customize pairwise plots.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

titanic = sns.load_dataset("titanic")

# Choose a handful of numeric variables (pclass is numeric)
vars_of_interest = ["age", "fare", "pclass", "sibsp", "parch"]
df = titanic[vars_of_interest + ["survived"]].dropna()

g = sns.PairGrid(
    df,
    vars=vars_of_interest,
    hue="survived",
    diag_sharey=False,
    corner=False,              # show both triangles; set True to show lower only
    palette={0: "#d62728", 1: "#2ca02c"}  # red for not-survived, green for survived
)

# Lower triangle: scatter
g.map_lower(
    sns.scatterplot,
    s=25, alpha=0.6, linewidth=0
)

# Diagonal: histograms (per hue)
g.map_diag(
    sns.histplot,
    bins=24, stat="density", alpha=0.8, common_norm=False
)

# Upper triangle: filled KDE contours
g.map_upper(
    sns.kdeplot,
    fill=True, thresh=0.05, levels=6, alpha=0.6
)

def corr_label(x, y, **kws):
    r = np.corrcoef(x, y)[0, 1]
    ax = plt.gca()
    ax.annotate(f"r = {r:.2f}", xy=(0.08, 0.86), xycoords=ax.transAxes, fontsize=11, weight="bold")

g.map_upper(corr_label)

g.add_legend(title="Survived")
for ax in g.axes.flat:
    if ax is not None:
        ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.3)

g.fig.suptitle("Titanic — Pairwise Relationships (Hue = Survived)", fontsize=16, y=1.02)
plt.show()


Use `FacetGrid` to create multiple subplots based on a categorical variable.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Example: Compare age distributions across sex and survival
g = sns.FacetGrid(
    data=titanic,
    col="sex",        # separate plots for male/female
    row="survived",   # separate plots for survived = 0/1
    margin_titles=True
)

# Map a histogram of age to each facet
g.map(sns.histplot, "age", bins=20, kde=False, color="steelblue")

# Adjust plot
g.set_axis_labels("Age", "Count")
g.set_titles(col_template="Sex: {col_name}", row_template="Survived: {row_name}")
plt.subplots_adjust(top=0.85)
g.fig.suptitle("Titanic Age Distribution by Sex and Survival", fontsize=16)

plt.show()


Implement a `JointGrid` to create a customized joint distribution plot.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

titanic = sns.load_dataset("titanic")
df = titanic[["age", "fare", "survived"]].dropna()

palette = {0: "#d62728", 1: "#2ca02c"}  # red = not survived, green = survived
labels  = {0: "Not Survived", 1: "Survived"}

g = sns.JointGrid(
    data=df,
    x="age", y="fare",
    height=7,              # figure size in inches
    space=0,               # no gap between joint/marginals
    ratio=50,              # relative height of joint vs. marginals
    marginal_ticks=True
)

for k, sub in df.groupby("survived"):
    g.ax_joint.scatter(
        sub["age"], sub["fare"],
        s=22, alpha=0.55, linewidth=0, label=labels[k], color=palette[k]
    )

# Optional: add a LOWESS trend line for overall relationship
sns.regplot(
    data=df, x="age", y="fare",
    lowess=True, scatter=False, ax=g.ax_joint,
    line_kws=dict(linewidth=2, alpha=0.9, color="black")
)

# Top (x) histogram
for k, sub in df.groupby("survived"):
    sns.histplot(
        sub, x="age",
        bins=24, stat="density", element="step", fill=True, alpha=0.25,
        ax=g.ax_marg_x, color=palette[k]
    )

# Right (y) histogram (horizontal)
for k, sub in df.groupby("survived"):
    sns.histplot(
        sub, y="fare",
        bins=24, stat="density", element="step", fill=True, alpha=0.25,
        ax=g.ax_marg_y, color=palette[k]
    )

r = np.corrcoef(df["age"], df["fare"])[0, 1]
g.ax_joint.annotate(
    f"Pearson r = {r:.2f}",
    xy=(0.03, 0.95), xycoords="axes fraction",
    ha="left", va="top", fontsize=11, weight="bold",
    bbox=dict(boxstyle="round,pad=0.25", fc="white", ec="0.7", alpha=0.9)
)

g.ax_joint.set_xlabel("Age")
g.ax_joint.set_ylabel("Fare")
g.ax_joint.grid(True, linestyle="--", linewidth=0.5, alpha=0.35)
g.ax_joint.legend(title="Status", frameon=True)
g.fig.suptitle("Titanic — Joint Distribution of Age vs Fare", fontsize=16, y=1.02)

plt.show()


# EX 5 - Style and Customization
Change the Seaborn style using `set_style`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

sns.set_style("darkgrid")

# Create a simple plot
sns.countplot(data=titanic, x="class", hue="sex")

plt.title("Passenger Class Distribution by Gender")
plt.show()


Remove spines from a plot using `despine`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

sns.set_style("whitegrid")

# Create a boxplot
sns.boxplot(data=titanic, x="class", y="age")

# Remove top and right spines
sns.despine()

plt.title("Age Distribution by Passenger Class")
plt.show()


Adjust figure size and aspect ratio using `set_context`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Adjust context (options: paper, notebook, talk, poster)
sns.set_context("talk", font_scale=1.2, rc={"figure.figsize":(10,6)})

# Create a bar plot
sns.barplot(data=titanic, x="class", y="fare", hue="sex")

plt.title("Average Fare by Class and Gender")
plt.show()


Customize color palettes using `set_palette`.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

titanic = sns.load_dataset("titanic")

# Set a custom color palette
sns.set_palette("Set2")   # other options: "deep", "muted", "pastel", "bright", "dark", "colorblind"

# Create a count plot
sns.countplot(data=titanic, x="class", hue="sex")

plt.title("Passenger Class Distribution by Gender")
plt.show()


# Happy Coding!