# Bar plots

This page demonstrates how to recreate the bar plots found in the "visualizing amounts" chapter of the book, [*Fundamentals of data visualization*](https://clauswilke.com/dataviz/visualizing-amounts.html).

A summary of the chapter can be found in our blog post, [here](blog post link).

## Data preparation

The data used for the plots in this notebook can be found in our GitHub [repository](https://github.com/bokeh/dataviz-fundamentals/tree/main/data). Here, we use the pandas library to parse the data.

In [1]:
# import the relevant libraries
import pandas as pd
from bokeh.io import output_notebook

output_notebook()  # used to render notebooks inline

## Vertical and Horizontal bar plots


In [2]:
file = "../data/movies.csv"
df = pd.read_csv(file)

# modify movie titles to fit book style
df["Title"] = df["Title"].apply(lambda x: x.split(":")[0])

# divide gross values by 1,000,000 to save space on y-axis
df["Weekend gross"] = df["Weekend gross"].apply(
    lambda x: (int(x.split("$")[1])) / 1_000_000
)

df

Unnamed: 0,Rank,Title,Weekend gross
0,1,Star Wars,71.565498
1,2,Jumanji,36.169328
2,3,Pitch Perfect 3,19.928525
3,4,The Greatest Showman,8.805843
4,5,Ferdinand,7.316746


In [3]:
from bokeh.plotting import figure, show
from bokeh.models import FactorRange

# plot a vertical bar

# create figure object
p1 = figure(
    x_range=FactorRange(
        factors=df.Title
    ),  # set values range for x-axis using FactorRange class
    height=300,  # set plot height
    width=600,  # set plot width
    title="Figure 6.1",  # set plot title
    y_axis_label="weekend gross (million USD)",  # set y-axis label
)

# call vbar() method on figure object to create vertical bars
p1.vbar(
    x="Title",  # column name for x-axis data
    top="Weekend gross",  # column name for bars height data
    width=0.7,  # bar width
    color="#66B2FF",  # Hex code for color of bars
    source=df,  # source of data for the column names
)

# Customize plot
p1.xaxis.major_tick_out = 0  # remove x-axis major tick marks
p1.yaxis.minor_tick_out = 0  # remove y-axis minor tick marks
p1.yaxis.major_tick_out = 0  # remove y-axis major tick marks
p1.y_range.start = 0  # start y-axis range from 0

show(p1)  # display plot

In [4]:
# plot a horizontal bar

# create figure object
p2 = figure(
    y_range=df.Title,
    height=300,
    title="Figure 6.3",
    x_axis_label="weekend gross (million USD)",
    sizing_mode="stretch_width",  # make plot width responsive to size of screen
)

# call hbar() method to create horizontal bars
p2.hbar(
    y="Title",  # column name for categorical data in y-axis
    right="Weekend gross",  # column name for the right endpoints of the bars
    height=0.8,  # bar height
    color="#66B2FF",  # Hex code for color of bars
    source=df,  # data source for column names
)

# customize plot
p2.x_range.start = 0
p2.xaxis.major_tick_out = 0
p2.xaxis.minor_tick_out = 0

p2.yaxis.major_tick_out = 0


show(p2)

**Figure 6.1** and **Figure 6.3** represents the highest grossing movies for the weekend of December 22-24, 2017.

The `vbar()` and `hbar()` methods of the Bokeh [Figure](https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html#figure) class are used to create the vertical and horizontal bar plots respectively.

Other optional parameters you can use to further customise the plots include:

- alpha 
- line_width
- legend_label
- etc.


## Grouped and stacked bar plots

### A. Grouped bars

In [5]:
file = "../data/income_by_age.tsv"
df = pd.read_table(file)

# sort values by race and age.
# select only columns with the different races
df = (
    df.sort_values(["race", "age"])
    .reset_index(drop=True)
    .iloc[7:35, :]
    .reset_index(drop=True)
)
# group dataframe by age and race and calculate their total income
age_group = df.groupby(["age", "race"])[["median_income"]].sum()

# unstack columns and drop higher level column name
age_group = age_group.unstack().reset_index().rename(columns={"": "age"})
age_group.columns = age_group.columns.droplevel()

age_group

race,age,asian,black,hispanic,white
0,15 to 24,45809,30267,45080,44588
1,25 to 34,80098,39176,45876,65389
2,35 to 44,100443,49336,50245,78093
3,45 to 54,98925,50103,58103,82289
4,55 to 64,91193,40363,51996,69387
5,65 to 74,56646,28697,36704,52219
6,> 74,26487,22302,23797,32203


In [6]:
from bokeh.models import NumeralTickFormatter as NTF
from bokeh.palettes import Blues5
from bokeh.transform import dodge

# create figure object
p = figure(
    title="Figure 6.7",
    height=400,
    sizing_mode="stretch_width",
    x_range=FactorRange(factors=age_group.age),
    x_axis_label="age (years)",
    y_axis_label="median income (USD)",
)

bar_width = 0.2  # bar width for all plot bars

# call vbar() method to create bars for each category
p.vbar(
    x=dodge("age", -0.3, range=p.x_range),  # dodge parameter to create bar offset
    top="asian",
    source=age_group,
    width=bar_width,
    color=Blues5[0],  # color palette used to color bars
    legend_label="Asian",
)

p.vbar(
    x=dodge("age", -0.1, range=p.x_range),
    top="white",
    source=age_group,
    width=bar_width,
    color=Blues5[1],
    legend_label="White",
)

p.vbar(
    x=dodge("age", 0.1, range=p.x_range),
    top="hispanic",
    source=age_group,
    width=bar_width,
    color=Blues5[2],
    legend_label="Hispanic",
)

p.vbar(
    x=dodge("age", 0.3, range=p.x_range),
    top="black",
    source=age_group,
    width=bar_width,
    color=Blues5[3],
    legend_label="Black",
)

# customize plot
# remove x-axis tick labels and tick marks
p.xaxis.major_tick_line_color = None
p.xaxis.major_tick_out = 0
p.xgrid.grid_line_color = None
p.xaxis.axis_line_color = None

# remove y-axis tick labels, tick marks and line.
p.yaxis.minor_tick_out = 0
p.yaxis.major_tick_out = 0
p.yaxis.axis_line_color = None

# configure y-axis range and y-axis ticks
p.y_range.start = 0
p.y_range.end = 100_000

# format the y-axis values as $ amounts with thousand delimiters
p.yaxis.formatter = NTF(format="$0,0")

show(p)

The `vbar()` method is called as many times for each category of bars you want to plot.

The [dodge](https://docs.bokeh.org/en/latest/docs/reference/transform.html#bokeh.transform.dodge) parameter is used to create an offset for each bar of each category to make sure they don't overlap.


In [7]:
# create function used to plot the vbars.
def plot_bars(df: pd.DataFrame) -> figure:
    """
    Creates a bar chart to visualize median income by age.

    Parameters:
        df (pd.DataFrame): The pandas DataFrame containing the data.
            It should have the following columns:
            - age: String values representing the age groups.
            - median_income: Integer values representing the median income for each age group.

    Returns:
        figure: A Bokeh figure object representing the bar chart.

    Raises:
        ValueError: If the required columns are not present in the DataFrame.
        TypeError: If the data types of the columns are not compatible with the plot.

    Example:
        df = pd.DataFrame({'age': [18, 25, 35], 'median_income': [50000, 60000, 70000]})
        plot = plot_bars(df)
        show(plot)
    """
    # Data validation
    if "age" not in df.columns or "median_income" not in df.columns:
        raise ValueError("The DataFrame must have 'age' and 'median_income' columns.")

    if not pd.api.types.is_numeric_dtype(df["median_income"]):
        raise TypeError("The 'median_income' column must contain numeric values.")

    if not pd.api.types.is_numeric_dtype(df["age"]):
        factors = df["age"].unique().tolist()
        df["age"] = pd.Categorical(df["age"], categories=factors, ordered=True)

    # Function implementation
    p = figure(
        title=f"Figure 6.9: {df.name}",
        height=300,
        width=300,
        x_range=FactorRange(factors=df.age),
        toolbar_location=None,
    )

    p.vbar(x="age", top="median_income", color="#99CCFF", source=df, width=0.9)

    p.xgrid.grid_line_color = None
    p.xaxis.major_tick_out = 0
    p.xaxis.axis_label = "Age (years)"
    p.yaxis.axis_label = "Median income (USD)"
    p.yaxis.formatter = NTF(format="$0,0")
    p.yaxis.minor_tick_out = 0
    p.yaxis.major_tick_out = 0
    p.y_range.start = 0
    p.y_range.end = 110_000

    return p

In [8]:
# change the age format in the dataframe
df["age"] = df.age.str.replace(" to ", "-")

# add a dataframe name for each race category
asian = df.iloc[:7, :].drop(["year", "race"], axis=1)
asian.name = "Asian"

black = df.iloc[7:14, :].drop(["year", "race"], axis=1)
black.name = "Black"

hispanic = df.iloc[14:21, :].drop(["year", "race"], axis=1)
hispanic.name = "Hispanic"

white = df.iloc[21:, :].drop(["year", "race"], axis=1)
white.name = "White"

In [9]:
# plot and render bar charts in a grid layout
from bokeh.layouts import gridplot

races = [asian, white, hispanic, black]
plots = []

# plot all four bar plots
# append all plots to "plots" list
for race in races:
    plot = plot_bars(race)
    plots.append(plot)

# use gridplot to create a 2x2 layout of the plots
layout = gridplot([plots[:2], plots[-2:]])

show(layout)

**Figure 6.7** and **Figures 6.9** represent the 2016 median U.S. annual household income versus age group and race.

### B. Stacked bars

In [10]:
file = "../data/titanic_all.tsv"
titanic = pd.read_table(file)
t_class = titanic.groupby("class").sex.value_counts().unstack().drop("*", axis=0)
t_class.index = ["1st class", "2nd class", "3rd class"]
t_class = t_class.reset_index().rename(columns={"index": "class"})

t_class

sex,class,female,male
0,1st class,143.0,179.0
1,2nd class,107.0,172.0
2,3rd class,212.0,499.0


In [11]:
# create figure object
p = figure(
    title="Figure 6.10",
    height=300,
    x_range=FactorRange(*t_class["class"]),
    toolbar_location=None,
    sizing_mode="stretch_width",
)

# plot stacked bars
p.vbar_stack(
    ["male", "female"],  # column names of bars to stack
    x="class",  # x-axis column name
    source=t_class,  # data source for column names
    width=0.9,  # bar width
    line_color="white",  # line color separating the bars
    line_width=2.5,  # line width separating the bars
    color=[
        (0, 102, 204),
        (204, 102, 0),
    ],  # RGB tuple color codes for the respective bars
    legend_label=["male passengers", "female passengers"],
)


# Add text in the bars
p.text(
    x="class",  # column name representing x-axis location of the text
    y="male",  # column name representing y-axis location of the text
    text="male",  # column name of text value
    text_align="center",  # horizontal position of text
    text_color="white",  # text color
    y_offset=40,  # vertical offset of text from initial position
    source=t_class,  # data source of column names
)
p.text(
    x="class",
    y="male",
    text="female",
    text_align="center",
    text_color="white",
    y_offset=-10,
    source=t_class,
)

# plot customization
p.xaxis.axis_line_color = None
p.xaxis.axis_line_width = 0
p.xaxis.major_tick_out = 0
p.y_range.start = 0
p.yaxis.visible = False
p.grid.grid_line_color = None
p.outline_line_color = None
p.legend.location = "top_left"
p.legend.orientation = "horizontal"

show(p)

The [vbar_stack](https://docs.bokeh.org/en/latest/docs/reference/plotting/figure.html#bokeh.plotting.figure.vbar_stack) method of the Bokeh `figure` class is used to create stacked bars from bottom to top.