# Continuous Variables - Histogram

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

### Dataset: Heart Disease

In [None]:
df = pd.read_csv("heart-disease.csv")

df.head()

---

# Histogram

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color='g', edgecolor='black'); # the default number of bins is 10

# The basic built-in colors:
# b: blue
# g: green
# r: red
# c: cyan
# m: magenta
# y: yellow
# k: black
# w: white

### Set the number of bins to display

In [None]:
fig, ax = plt.subplots()
                                                # set the number of bins
ax.hist(df['age'], color='g', edgecolor='black', bins = 30);

### Auto-set the number of bins to display

In [None]:
fig, ax = plt.subplots()
                                                # auto-set the number of bins
                                                # "ideal" number of bins that most faithfully represent your data.
ax.hist(df['age'], color='g', edgecolor='black', bins = "auto");

### Set a custom color

[Named Colors](https://matplotlib.org/stable/gallery/color/named_colors.html)

[HTML Color Codes](https://htmlcolorcodes.com)

In [None]:
fig, ax = plt.subplots()

                        # custom colors
ax.hist(df['age'], color = '#6399EB', edgecolor='black')

ax.set_title("Histogram")
ax.set_xlabel("Age");

---

## Style the chart

### Format the labels (increase font size)

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9616FA', edgecolor='black')

# Increase the font size of the labels
ax.set_title('Histogram', fontsize=16)
ax.set_xlabel('Age', fontsize=16);

### Customize the ticks

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9616FA', edgecolor='black')

ax.set_title('Histogram', fontsize=16)
ax.set_xlabel('Age', fontsize=16)

# Customize the ticks
ax.tick_params(color="purple", width=3, length=5);

### Customize the tick labels (color and size)

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9616FA', edgecolor='black')

ax.set_title('Histogram', fontsize=16)
ax.set_xlabel('Age', fontsize=16)

# Customize the tick labels
ax.tick_params(labelsize=16, labelcolor="purple");

### Customize only a specific axis

In [None]:
fig, ax = plt.subplots()

ax.hist(df['age'], color = '#9616FA', edgecolor='black')

ax.set_title('Histogram', fontsize=16)
ax.set_xlabel('Age', fontsize=16)

# Customize the ticks of a specific axis
ax.tick_params(axis='y', labelsize=16, labelcolor="purple", color="purple", width=3, length=5);

---

## Continuous x categorical (age x gender)

### Create paired histograms

In [None]:
fig, ax = plt.subplots()

# The data
female_ages = df.loc[df['sex']=="female", "age"]
male_ages = df.loc[df['sex']=="male", "age"]

        # List of ages by gender (for paired histogram)
ax.hist([female_ages, male_ages], color=['pink', "cornflowerblue"], edgecolor='black')

ax.set(xlabel="Age", title="Histogram");

### Add a legend

In [None]:
fig, ax = plt.subplots()

# The data
females_age = df.loc[df['sex']=="female", "age"]
males_age = df.loc[df['sex']=="male", "age"]

ax.hist([females_age, males_age], color=['pink',"cornflowerblue"], edgecolor='black')

ax.set(xlabel="Age", title="Histogram")

plt.legend(["female", "male"]);  # <-- display a legend

### Create a stacked histogram

In [None]:
fig, ax = plt.subplots()

# The data
females_age = df.loc[df['sex']=="female", "age"]
males_age = df.loc[df['sex']=="male", "age"]

                                      
      #  on bottom     on top                                                          # set stacked to True
ax.hist([females_age, males_age], color = ["pink", 'cornflowerblue'], edgecolor="black", stacked=True)

ax.set(xlabel="Age", title="Histogram")
       
plt.legend(["female", "male"]);

---