In [1]:
from bokeh.io import output_notebook

output_notebook()

In [2]:
import pandas as pd

In [3]:
file = "../data/csv_files/titanic_all.csv"

df = pd.read_csv(file)

ages = df.age.dropna()

ages

0       29.00
1        2.00
2       30.00
3       25.00
4        0.92
        ...  
1308    27.00
1309    26.00
1310    22.00
1311    24.00
1312    29.00
Name: age, Length: 756, dtype: float64

In [4]:
from bokeh.plotting import figure, show
import numpy as np

In [9]:
# create a function to plot a histogram


def plot_hist(df, step, title):
    """
    Create a histogram plot using Bokeh.

    Args:
        df (array-like): The data to be plotted as a histogram.
        step (float): The width of each bin in the histogram.
        title (str): The title of the plot.

    Returns:
        bokeh.plotting.figure.Figure: The Bokeh figure object representing the histogram plot.

    Raises:
        ValueError: If the input data is empty or contains invalid values.
    """
    # data validation
    if len(df) == 0:
        raise ValueError("Input data is empty.")

    if not all(isinstance(val, (int, float)) for val in df):
        raise ValueError(
            "Input data contains invalid values. Expected numerical values."
        )

    # function implementation
    bins = np.arange(0, 80, step)
    hist, edges = np.histogram(df, bins=bins)

    p = figure(
        title=title,
        height=300,
        width=400,
        x_axis_label="age (years)",
        y_axis_label="count",
    )

    p.quad(
        top=hist,
        bottom=0,
        left=edges[:-1],
        right=edges[1:],
        fill_color="#66B2FF",
        line_color="white",
    )

    p.x_range.start = 0
    p.xaxis.ticker = [0, 20, 40, 60]
    p.xgrid.grid_line_color = None
    p.xaxis.axis_line_color = None
    p.xaxis.major_tick_line_color = "gray"
    p.xaxis.major_tick_out = 2

    p.y_range.start = 0
    p.yaxis.minor_tick_out = 0
    p.yaxis.axis_line_color = None
    p.yaxis.major_tick_line_color = "gray"
    p.yaxis.major_tick_out = 0
    p.yaxis.major_tick_in = 0

    return p

In [10]:
single = plot_hist(ages, 5, "figure 7.1")

show(single)

In [11]:
from bokeh.layouts import gridplot

In [12]:
one = plot_hist(ages, 1, "a")
three = plot_hist(ages, 3, "b")
five = plot_hist(ages, 5, "c")
fifteen = plot_hist(ages, 15, "d")

layout = gridplot([[one, three], [five, fifteen]])

show(layout)