<a href="https://colab.research.google.com/github/isjogren/ai-wip/blob/main/junk/Data_Visualization_(Matplotlib_Seaborn).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Python Data Visualization (Matplotlib/Seaborn)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

### Dataset: Heart Disease

In [None]:
df = pd.read_csv("heart-disease.csv")

df.head()

---

# Matplotlib

## Figures and Axes

In [None]:
# subplots() returns a figure and axis

fig, ax = plt.subplots()

---

# Line Plot

In [None]:
# Defaults to row index on x-axis

fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

## Create multiple plots

### Separate rows

In [None]:
# Render plots in separate axes;   subplots(n_rows, n_cols)
# 2 rows

fig, axes = plt.subplots(2)

axes[0].plot(df["max_hr"])
axes[1].plot(df["age"]);

### Separate columns

In [None]:
# Render plots in separate axes;   subplots(n_rows, n_cols)
# 1 row, 2 columns

fig, axes = plt.subplots(1,2)


axes[0].plot(df["max_hr"])
axes[1].plot(df["age"]);

## Set the figsize

In [None]:
# Set the size of the figure  (figsize=(w, h))
fig, axes = plt.subplots(1,2, figsize=(15, 5))


axes[0].plot(df["max_hr"])
axes[1].plot(df["age"]);

## Set properties

In [None]:
fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

# A more flexible way of setting properties
ax.set_title("Line Plot")
ax.set_xlabel("Row Index")
ax.set_ylabel("Max HR");

In [None]:
fig, ax = plt.subplots()

ax.plot(df["max_hr"]);

# A more convenient way of setting properties
ax.set(title="Line Plot", xlabel="Row Index", ylabel="Max HR");

---

# Histogram

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color='green', edgecolor='black')

ax.set(xlabel="Age", title="Histogram");

### Set the number of bins to display

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = 'green', edgecolor='black', bins = 30)

ax.set(xlabel="Age", title="Histogram");

---

# Scatter Plot

In [None]:
fig, ax = plt.subplots()

#              (x-axis,       y-axis,      alpha,    size,     color)
ax.scatter(x=df['age'], y=df['max_hr'], alpha=.3, s = 100, c = "black")

# Set the properties
ax.set(title="Heart Disease", xlabel='Age', ylabel="Max HR");

### Auto-set the colors based on class (target) membership
The target variable must be an integer.

In [None]:
fig, ax = plt.subplots()


ax.scatter(df['age'], df['max_hr'], alpha=.3, s = 200, c = df["heart_disease"])

ax.set(title="Heart Disease", xlabel='Age', ylabel="Max HR");

### Set the marker (shape)

In [None]:
fig, ax = plt.subplots()

# sample markers: "o"(default), "v", "^", x", "P", "d", "."
ax.scatter(df['age'], df['max_hr'], alpha=.3, s = 200, c = df["heart_disease"], marker="o")

ax.set(title="Heart Disease", xlabel='Age', ylabel="Max HR");

---

---

# Seaborn

In [None]:
import seaborn as sns

---

# Line Plot

In [None]:
sns.lineplot(data=df, x="age", y="max_hr");

## Style by categorical feature

In [None]:
sns.lineplot(data=df, x="age", y="max_hr",
                      hue="sex", style="sex");

# Relation Plot
Allows line plot grouping within an additional categorical variable.

In [None]:
sns.relplot(data=df, x="age", y="max_hr",
                     hue="sex", style="sex",
                     kind="line", col="heart_disease");

---

# Box Plot

In [None]:
fig, ax = plt.subplots()

                        # x is a Categorical variable
sns.boxplot(data=df, x = "sex", y="max_hr", palette="Set1")

ax.set_title("Box Plot");

---

# Joint Plot

In [None]:
sns.jointplot(data=df, x="age", y="max_hr", kind="reg");

---

# Pair Plot

Pairwise feature comparisons

In [None]:
df2 = df[['age', 'rest_bp', 'chol', 'max_hr', 'st_depr', "heart_disease"]]

In [None]:
sns.pairplot(data=df, hue="heart_disease");

---

# Heatmap

### View correlation matrix

In [None]:
df2.corr()

In [None]:
sns.heatmap(df2.corr(), cmap="Blues", annot=True);