diff --git a/benches/benchmarks/custom_measurement.rs b/benches/benchmarks/custom_measurement.rs
index 6d0d5e6ac..aac81276c 100644
--- a/benches/benchmarks/custom_measurement.rs
+++ b/benches/benchmarks/custom_measurement.rs
@@ -21,7 +21,7 @@ impl ValueFormatter for HalfSecFormatter {
         }
     }
 
-    fn scale_for_graph(&self, ns: f64, values: &mut [f64]) -> &'static str {
+    fn scale_values(&self, _typical: f64, values: &mut [f64]) -> &'static str {
         for val in values {
             *val *= 2f64 * 10f64.powi(-9);
         }
@@ -29,6 +29,30 @@ impl ValueFormatter for HalfSecFormatter {
         "s/2"
     }
 
+    fn scale_throughputs(
+        &self,
+        _typical: f64,
+        throughput: &Throughput,
+        values: &mut [f64],
+    ) -> &'static str {
+        match *throughput {
+            Throughput::Bytes(bytes) => {
+                for val in values {
+                    *val = (bytes as f64) / (*val * 2f64 * 10f64.powi(-9))
+                }
+
+                "b/s/2"
+            }
+            Throughput::Elements(elems) => {
+                for val in values {
+                    *val = (elems as f64) / (*val * 2f64 * 10f64.powi(-9))
+                }
+
+                "elem/s/2"
+            }
+        }
+    }
+
     fn scale_for_machines(&self, values: &mut [f64]) -> &'static str {
         for val in values {
             *val *= 2f64 * 10f64.powi(-9);
diff --git a/book/src/user_guide/custom_measurements.md b/book/src/user_guide/custom_measurements.md
index 38368e4ee..03e174465 100644
--- a/book/src/user_guide/custom_measurements.md
+++ b/book/src/user_guide/custom_measurements.md
@@ -94,9 +94,10 @@ The next trait is `ValueFormatter`, which defines how a measurement is displayed
 
 ```rust
 pub trait ValueFormatter {
-    fn format_value(&self, value: f64) -> String;
-    fn format_throughput(&self, throughput: &Throughput, value: f64) -> String;
-    fn scale_for_graph(&self, typical_value: f64, values: &mut[f64]) -> (&'static str);
+    fn format_value(&self, value: f64) -> String {...}
+    fn format_throughput(&self, throughput: &Throughput, value: f64) -> String {...}
+    fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> &'static str;
+    fn scale_throughputs(&self, typical_value: f64, throughput: &Throughput, values: &mut [f64]) -> &'static str;
     fn scale_for_machines(&self, values: &mut [f64]) -> &'static str;
 }
 ```
@@ -112,7 +113,7 @@ Implementors should try to format the values in a way that will make sense to hu
 prefixes to simplify the numbers. An easy way to do this is to have a series of conditionals like so:
 
 ```rust
-if ns < 1.0 {  // ns = time in nanoseconds
+if ns < 1.0 {  // ns = time in nanoseconds per iteration
     format!("{:>6} ps", ns * 1e3)
 } else if ns < 10f64.powi(3) {
     format!("{:>6} ns", ns)
@@ -128,24 +129,26 @@ if ns < 1.0 {  // ns = time in nanoseconds
 It's also a good idea to limit the amount of precision in floating-point output - after a few
 digits the numbers don't matter much anymore but add a lot of visual noise and make the results
 harder to interpret. For example, it's very unlikely that anyone cares about the difference between
-`10.2896653s` and `10.2896654s` - it's much more salient that their function takes "about 10.3
+`10.2896653s` and `10.2896654s` - it's much more salient that their function takes "about 10.290
 seconds per iteration".
 
 With that out of the way, `format_value` is pretty straightforward. `format_throughput` is also not
 too difficult; match on `Throughput::Bytes` or `Throughput::Elements` and generate an appropriate
 description. For wall-clock time, that would likely take the form of "bytes per second", but a
 measurement that read CPU performance counters might want to display throughput in terms of "cycles
-per byte".
-
-`scale_for_graph` is a bit more complex. This is primarily used for plotting. This accepts a
-"typical" value chosen by Criterion.rs, and a mutable slice of values to scale. This function
-should choose an appropriate unit based on the typical value, and convert all values in the slice
-to that unit. It should also return a string representing the chosen unit. So, for our wall-clock
-times where the measured values are in nanoseconds, if we wanted to display plots in milliseconds
-we would multiply all of the input values by `10.0f64.powi(-6)` and return `"ms"`, because
-multiplying a value in nanoseconds by 10^-6 gives a value in milliseconds.
-
-`scale_for_machines` is similar to `scale_for_graph`, except that it's used for generating
+per byte". Note that default implementations of `format_value` and `format_throughput` are provided
+which use `scale_values` and `scale_throughputs`, but you can override them if you wish.
+
+`scale_values` is a bit more complex. This accepts a "typical" value chosen by Criterion.rs, and a
+mutable slice of values to scale. This function should choose an appropriate unit based on the
+typical value, and convert all values in the slice to that unit. It should also return a string
+representing the chosen unit. So, for our wall-clock times where the measured values are in
+nanoseconds, if we wanted to display plots in milliseconds we would multiply all of the input
+values by `10.0f64.powi(-6)` and return `"ms"`, because multiplying a value in nanoseconds by 10^-6
+gives a value in milliseconds. `scale_throughputs` does the same thing, only it converts a slice of
+measured values to their corresponding scaled throughput values.
+
+`scale_for_machines` is similar to `scale_values`, except that it's used for generating
 machine-readable outputs. It does not accept a typical value, because this function should always
 return values in the same unit.
 
@@ -172,7 +175,7 @@ impl ValueFormatter for HalfSecFormatter {
         }
     }
 
-    fn scale_for_graph(&self, ns: f64, values: &mut [f64]) -> &'static str {
+    fn scale_values(&self, ns: f64, values: &mut [f64]) -> &'static str {
         for val in values {
             *val *= 2f64 * 10f64.powi(-9);
         }
@@ -180,6 +183,31 @@ impl ValueFormatter for HalfSecFormatter {
         "s/2"
     }
 
+    fn scale_throughputs(
+        &self,
+        _typical: f64,
+        throughput: &Throughput,
+        values: &mut [f64],
+    ) -> &'static str {
+        match *throughput {
+            Throughput::Bytes(bytes) => {
+                // Convert nanoseconds/iteration to bytes/half-second.
+                for val in values {
+                    *val = (bytes as f64) / (*val * 2f64 * 10f64.powi(-9))
+                }
+
+                "b/s/2"
+            }
+            Throughput::Elements(elems) => {
+                for val in values {
+                    *val = (elems as f64) / (*val * 2f64 * 10f64.powi(-9))
+                }
+
+                "elem/s/2"
+            }
+        }
+    }
+
     fn scale_for_machines(&self, values: &mut [f64]) -> &'static str {
         // Convert values in nanoseconds to half-seconds.
         for val in values {
diff --git a/src/measurement.rs b/src/measurement.rs
index 5d77163db..bec7e79e3 100644
--- a/src/measurement.rs
+++ b/src/measurement.rs
@@ -20,18 +20,41 @@ use std::time::{Duration, Instant};
 /// of the elapsed time in nanoseconds.
 pub trait ValueFormatter {
     /// Format the value (with appropriate unit) and return it as a string.
-    fn format_value(&self, value: f64) -> String;
+    fn format_value(&self, value: f64) -> String {
+        let mut values = [value];
+        let unit = self.scale_values(value, &mut values);
+        format!("{:>6} {}", short(values[0]), unit)
+    }
 
     /// Format the value as a throughput measurement. The value represents the measurement value;
     /// the implementor will have to calculate bytes per second, iterations per cycle, etc.
-    fn format_throughput(&self, throughput: &Throughput, value: f64) -> String;
+    fn format_throughput(&self, throughput: &Throughput, value: f64) -> String {
+        let mut values = [value];
+        let unit = self.scale_throughputs(value, throughput, &mut values);
+        format!("{:>6} {}", short(values[0]), unit)
+    }
+
+    /// Scale the given values to some appropriate unit and return the unit string.
+    ///
+    /// The given typical value should be used to choose the unit. This function may be called
+    /// multiple times with different datasets; the typical value will remain the same to ensure
+    /// that the units remain consistent within a graph. The typical value will not be NaN.
+    /// Values will not contain NaN as input, and the transformed values must not contain NaN.
+    fn scale_values(&self, typical_value: f64, values: &mut [f64]) -> (&'static str);
 
-    /// Scale the given values and return an appropriate unit string.
+    /// Convert the given measured values into throughput numbers based on the given throughput
+    /// value, scale them to some appropriate unit, and return the unit string.
     ///
     /// The given typical value should be used to choose the unit. This function may be called
     /// multiple times with different datasets; the typical value will remain the same to ensure
     /// that the units remain consistent within a graph. The typical value will not be NaN.
-    fn scale_for_graph(&self, typical_value: f64, values: &mut [f64]) -> (&'static str);
+    /// Values will not contain NaN as input, and the transformed values must not contain NaN.
+    fn scale_throughputs(
+        &self,
+        typical_value: f64,
+        throughput: &Throughput,
+        values: &mut [f64],
+    ) -> (&'static str);
 
     /// Scale the values and return a unit string designed for machines.
     ///
@@ -82,52 +105,60 @@ pub trait Measurement {
 
 pub(crate) struct DurationFormatter;
 impl DurationFormatter {
-    fn bytes_per_second(&self, bytes_per_second: f64) -> String {
-        if bytes_per_second < 1024.0 {
-            format!("{:>6}   B/s", short(bytes_per_second))
+    fn bytes_per_second(&self, bytes: f64, typical: f64, values: &mut [f64]) -> &'static str {
+        let bytes_per_second = bytes * (1e9 / typical);
+        let (denominator, unit) = if bytes_per_second < 1024.0 {
+            (1.0, "  B/s")
         } else if bytes_per_second < 1024.0 * 1024.0 {
-            format!("{:>6} KiB/s", short(bytes_per_second / 1024.0))
+            (1024.0, "KiB/s")
         } else if bytes_per_second < 1024.0 * 1024.0 * 1024.0 {
-            format!("{:>6} MiB/s", short(bytes_per_second / (1024.0 * 1024.0)))
+            (1024.0 * 1024.0, "MiB/s")
         } else {
-            format!(
-                "{:>6} GiB/s",
-                short(bytes_per_second / (1024.0 * 1024.0 * 1024.0))
-            )
+            (1024.0 * 1024.0 * 1024.0, "GiB/s")
+        };
+
+        for val in values {
+            let bytes_per_second = bytes * (1e9 / *val);
+            *val = bytes_per_second / denominator;
         }
+
+        unit
     }
 
-    fn elements_per_second(&self, elements_per_second: f64) -> String {
-        if elements_per_second < 1000.0 {
-            format!("{:>6}  elem/s", short(elements_per_second))
-        } else if elements_per_second < 1000.0 * 1000.0 {
-            format!("{:>6} Kelem/s", short(elements_per_second / 1000.0))
-        } else if elements_per_second < 1000.0 * 1000.0 * 1000.0 {
-            format!(
-                "{:>6} Melem/s",
-                short(elements_per_second / (1000.0 * 1000.0))
-            )
+    fn elements_per_second(&self, elems: f64, typical: f64, values: &mut [f64]) -> &'static str {
+        let elems_per_second = elems * (1e9 / typical);
+        let (denominator, unit) = if elems_per_second < 1000.0 {
+            (1.0, " elem/s")
+        } else if elems_per_second < 1000.0 * 1000.0 {
+            (1000.0, "Kelem/s")
+        } else if elems_per_second < 1000.0 * 1000.0 * 1000.0 {
+            (1000.0 * 1000.0, "Melem/s")
         } else {
-            format!(
-                "{:>6} Gelem/s",
-                short(elements_per_second / (1000.0 * 1000.0 * 1000.0))
-            )
+            (1000.0 * 1000.0 * 1000.0, "Gelem/s")
+        };
+
+        for val in values {
+            let elems_per_second = elems * (1e9 / *val);
+            *val = elems_per_second / denominator;
         }
+
+        unit
     }
 }
 impl ValueFormatter for DurationFormatter {
-    fn format_value(&self, ns: f64) -> String {
-        crate::format::time(ns)
-    }
-
-    fn format_throughput(&self, throughput: &Throughput, ns: f64) -> String {
+    fn scale_throughputs(
+        &self,
+        typical: f64,
+        throughput: &Throughput,
+        values: &mut [f64],
+    ) -> &'static str {
         match *throughput {
-            Throughput::Bytes(bytes) => self.bytes_per_second((bytes as f64) * (1e9 / ns)),
-            Throughput::Elements(elems) => self.elements_per_second((elems as f64) * (1e9 / ns)),
+            Throughput::Bytes(bytes) => self.bytes_per_second(bytes as f64, typical, values),
+            Throughput::Elements(elems) => self.elements_per_second(elems as f64, typical, values),
         }
     }
 
-    fn scale_for_graph(&self, ns: f64, values: &mut [f64]) -> &'static str {
+    fn scale_values(&self, ns: f64, values: &mut [f64]) -> &'static str {
         let (factor, unit) = if ns < 10f64.powi(0) {
             (10f64.powi(3), "ps")
         } else if ns < 10f64.powi(3) {
diff --git a/src/plot/distributions.rs b/src/plot/distributions.rs
index 4baad6c7a..4fec4d887 100644
--- a/src/plot/distributions.rs
+++ b/src/plot/distributions.rs
@@ -24,13 +24,13 @@ fn abs_distribution(
     let ci = estimate.confidence_interval;
     let typical = ci.upper_bound;
     let mut ci_values = [ci.lower_bound, ci.upper_bound, estimate.point_estimate];
-    let unit = formatter.scale_for_graph(typical, &mut ci_values);
+    let unit = formatter.scale_values(typical, &mut ci_values);
     let (lb, ub, p) = (ci_values[0], ci_values[1], ci_values[2]);
 
     let start = lb - (ub - lb) / 9.;
     let end = ub + (ub - lb) / 9.;
     let mut scaled_xs: Vec<f64> = distribution.iter().cloned().collect();
-    let _ = formatter.scale_for_graph(typical, &mut scaled_xs);
+    let _ = formatter.scale_values(typical, &mut scaled_xs);
     let scaled_xs_sample = Sample::new(&scaled_xs);
     let (kde_xs, ys) = kde::sweep(scaled_xs_sample, KDE_POINTS, Some((start, end)));
 
diff --git a/src/plot/pdf.rs b/src/plot/pdf.rs
index e9ca47a8b..1500c3bab 100644
--- a/src/plot/pdf.rs
+++ b/src/plot/pdf.rs
@@ -14,7 +14,7 @@ pub(crate) fn pdf(
     let avg_times = &measurements.avg_times;
     let typical = avg_times.max();
     let mut scaled_avg_times: Vec<f64> = (avg_times as &Sample<f64>).iter().cloned().collect();
-    let unit = formatter.scale_for_graph(typical, &mut scaled_avg_times);
+    let unit = formatter.scale_values(typical, &mut scaled_avg_times);
     let scaled_avg_times = Sample::new(&scaled_avg_times);
 
     let mean = scaled_avg_times.mean();
@@ -36,7 +36,7 @@ pub(crate) fn pdf(
     let (xs, ys) = kde::sweep(&scaled_avg_times, KDE_POINTS, None);
     let (lost, lomt, himt, hist) = avg_times.fences();
     let mut fences = [lost, lomt, himt, hist];
-    let _ = formatter.scale_for_graph(typical, &mut fences);
+    let _ = formatter.scale_values(typical, &mut fences);
     let [lost, lomt, himt, hist] = fences;
 
     let vertical = &[0., max_iters];
@@ -234,7 +234,7 @@ pub(crate) fn pdf_small(
     let avg_times = &*measurements.avg_times;
     let typical = avg_times.max();
     let mut scaled_avg_times: Vec<f64> = (avg_times as &Sample<f64>).iter().cloned().collect();
-    let unit = formatter.scale_for_graph(typical, &mut scaled_avg_times);
+    let unit = formatter.scale_values(typical, &mut scaled_avg_times);
     let scaled_avg_times = Sample::new(&scaled_avg_times);
     let mean = scaled_avg_times.mean();
 
@@ -294,14 +294,14 @@ fn pdf_comparison_figure(
     let base_avg_times = Sample::new(&comparison.base_avg_times);
     let typical = base_avg_times.max().max(measurements.avg_times.max());
     let mut scaled_base_avg_times: Vec<f64> = comparison.base_avg_times.clone();
-    let unit = formatter.scale_for_graph(typical, &mut scaled_base_avg_times);
+    let unit = formatter.scale_values(typical, &mut scaled_base_avg_times);
     let scaled_base_avg_times = Sample::new(&scaled_base_avg_times);
 
     let mut scaled_new_avg_times: Vec<f64> = (&measurements.avg_times as &Sample<f64>)
         .iter()
         .cloned()
         .collect();
-    let _ = formatter.scale_for_graph(typical, &mut scaled_new_avg_times);
+    let _ = formatter.scale_values(typical, &mut scaled_new_avg_times);
     let scaled_new_avg_times = Sample::new(&scaled_new_avg_times);
 
     let base_mean = scaled_base_avg_times.mean();
diff --git a/src/plot/regression.rs b/src/plot/regression.rs
index 29e2b3d77..b1d44f45c 100644
--- a/src/plot/regression.rs
+++ b/src/plot/regression.rs
@@ -25,12 +25,12 @@ fn regression_figure(
     let data = &measurements.data;
     let (max_iters, typical) = (data.x().max(), data.y().max());
     let mut scaled_y: Vec<f64> = data.y().iter().cloned().collect();
-    let unit = formatter.scale_for_graph(typical, &mut scaled_y);
+    let unit = formatter.scale_values(typical, &mut scaled_y);
     let scaled_y = Sample::new(&scaled_y);
 
     let point_estimate = Slope::fit(&measurements.data).0;
     let mut scaled_points = [point_estimate * max_iters, lb * max_iters, ub * max_iters];
-    let _ = formatter.scale_for_graph(typical, &mut scaled_points);
+    let _ = formatter.scale_values(typical, &mut scaled_points);
     let [point, lb, ub] = scaled_points;
 
     let exponent = (max_iters.log10() / 3.).floor() as i32 * 3;
@@ -179,7 +179,7 @@ fn regression_comparison_figure(
         point * max_iters,
         ub * max_iters,
     ];
-    let unit = formatter.scale_for_graph(typical, &mut points);
+    let unit = formatter.scale_values(typical, &mut points);
     let [base_lb, base_point, base_ub, lb, point, ub] = points;
 
     let mut figure = Figure::new();
diff --git a/src/plot/summary.rs b/src/plot/summary.rs
index 916b20ea5..9f085fd1e 100644
--- a/src/plot/summary.rs
+++ b/src/plot/summary.rs
@@ -71,7 +71,7 @@ pub fn line_comparison(
         .fold(::std::f64::NAN, f64::max);
 
     let mut dummy = [1.0];
-    let unit = formatter.scale_for_graph(max, &mut dummy);
+    let unit = formatter.scale_values(max, &mut dummy);
 
     f.configure(Axis::LeftY, |a| {
         a.configure(Grid::Major, |g| g.show())
@@ -96,7 +96,7 @@ pub fn line_comparison(
             .collect();
         tuples.sort_by(|&(ax, _), &(bx, _)| (ax.partial_cmp(&bx).unwrap_or(Ordering::Less)));
         let (xs, mut ys): (Vec<_>, Vec<_>) = tuples.into_iter().unzip();
-        formatter.scale_for_graph(max, &mut ys);
+        formatter.scale_values(max, &mut ys);
 
         let function_name = key.as_ref().map(|string| escape_underscores(string));
 
@@ -160,7 +160,7 @@ pub fn violin(
         }
     }
     let mut dummy = [1.0];
-    let unit = formatter.scale_for_graph(max, &mut dummy);
+    let unit = formatter.scale_values(max, &mut dummy);
 
     let tics = || (0..).map(|x| (f64::from(x)) + 0.5);
     let size = Size(1280, 200 + (25 * all_curves.len()));
@@ -191,8 +191,8 @@ pub fn violin(
         let mut y1: Vec<_> = y.iter().map(|&y| i + y * 0.5).collect();
         let mut y2: Vec<_> = y.iter().map(|&y| i - y * 0.5).collect();
 
-        formatter.scale_for_graph(max, &mut y1);
-        formatter.scale_for_graph(max, &mut y2);
+        formatter.scale_values(max, &mut y1);
+        formatter.scale_values(max, &mut y2);
 
         f.plot(FilledCurve { x: &**x, y1, y2 }, |c| {
             if is_first {