# Seaborn Tutorial

Basic Seaborn plots for beginner ML (EDA). Assumes `df` is already a loaded pandas DataFrame.

If you donâ€™t have `df` yet, uncomment the sample dataset cell below (Iris).

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# (Optional) nicer default style
sns.set_theme()

## (Optional) Sample dataset
Uncomment if you want a quick `df` for practice.

In [None]:
# df = sns.load_dataset('iris')
# df.head()

---

# 1. Line Plot
Common in ML for trends like loss/accuracy over steps, or any time series.

In [None]:
# Example columns (replace with your real ones)
# x_col = "epoch"
# y_col = "loss"

# If your df already has those columns:
# sns.lineplot(data=df, x=x_col, y=y_col)

# Demo using simple data (no need to edit)
_demo_df = {"step": [1,2,3,4,5], "value": [0.90, 0.70, 0.55, 0.45, 0.40]}
sns.lineplot(data=_demo_df, x="step", y="value")
plt.title("Line Plot")
plt.show()

---

# 2. Histogram
Use to see distribution, skewness, and outliers for a feature.

In [None]:
# Replace 'feature1' with a numeric column in your df:
# sns.histplot(data=df, x="feature1", bins=30)

_demo_vals = [1.2, 2.3, 2.9, 3.1, 3.8, 4.0, 4.2, 5.5, 6.1, 6.8]
sns.histplot(x=_demo_vals, bins=6)
plt.title("Histogram")
plt.show()

---

# 3. Scatter Plot
Use to check relationships (linear/non-linear) and class separation (with `hue`).

In [None]:
# Without classes:
# sns.scatterplot(data=df, x="feature1", y="feature2")

# With classes (supervised ML):
# sns.scatterplot(data=df, x="feature1", y="feature2", hue="target")

_demo_x = [1,2,3,4,5,6]
_demo_y = [2.1, 2.9, 3.7, 4.2, 5.1, 5.9]
sns.scatterplot(x=_demo_x, y=_demo_y)
plt.title("Scatter Plot")
plt.show()

---

# 4. Box Plot
Use to compare distributions across categories/classes and detect outliers.

In [None]:
# Typical ML usage:
# sns.boxplot(data=df, x="target", y="feature1")

# Demo:
_demo = {
    "group": ["A"]*7 + ["B"]*7,
    "value": [10,12,13,12,11,14,30, 8,9,10,11,9,10,12]
}
sns.boxplot(data=_demo, x="group", y="value")
plt.title("Box Plot")
plt.show()

---

# 5. Heatmap (Correlation)
Very common for ML EDA: find correlated features and redundancy.

In [None]:
# Correlation heatmap from your df (numeric columns only):
# corr = df.corr(numeric_only=True)
# sns.heatmap(corr, annot=True)
# plt.title("Correlation Heatmap")
# plt.show()

# Demo matrix:
matrix = [
    [1.0, 0.8, 0.2],
    [0.8, 1.0, 0.5],
    [0.2, 0.5, 1.0]
]
sns.heatmap(matrix, annot=True, xticklabels=["F1","F2","F3"], yticklabels=["F1","F2","F3"])
plt.title("Heatmap")
plt.show()

---

# 6. Bar Plot
Use to compare category-wise averages (Seaborn defaults to mean).

In [None]:
# Typical ML usage:
# sns.barplot(data=df, x="target", y="feature1")  # mean(feature1) per class

# Demo:
_demo = {"class": ["Cat","Dog","Bird"], "count": [50,35,15]}
sns.barplot(data=_demo, x="class", y="count")
plt.title("Bar Plot")
plt.show()

---

# 7. Count Plot
Best for class imbalance checks (counts per category).

In [None]:
# For your df:
# sns.countplot(data=df, x="target")

_demo = {"target": ["A","A","A","B","B","C"]}
sns.countplot(data=_demo, x="target")
plt.title("Count Plot")
plt.show()

---

# 8. Pair Plot
Fast multi-feature visualization (good for small datasets like Iris).

In [None]:
# For Iris-like datasets:
# sns.pairplot(df, hue="target")

# Demo with seaborn's iris:
iris = sns.load_dataset("iris")
sns.pairplot(iris, hue="species")
plt.show()