Skip to content

Commit 2f41c33

Browse files
committed
perf: stats microoptimize
- remove unnecessary string alloc when initializing stats_separator - use faster/more idiomatic vec! macro instead of extend_from_slice - use faster itoa instead of to_string for integer string conversions
1 parent 7084f50 commit 2f41c33

File tree

1 file changed

+26
-45
lines changed

1 file changed

+26
-45
lines changed

src/cmd/stats.rs

Lines changed: 26 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -2404,49 +2404,39 @@ impl Stats {
24042404
let record_count = *RECORD_COUNT.get().unwrap_or(&1);
24052405

24062406
// get the stats separator
2407-
let stats_separator = STATS_SEPARATOR
2408-
.get_or_init(|| {
2409-
if self.which.mode || self.which.percentiles {
2410-
std::env::var("QSV_STATS_SEPARATOR")
2411-
.unwrap_or_else(|_| DEFAULT_STATS_SEPARATOR.to_string())
2412-
} else {
2413-
DEFAULT_STATS_SEPARATOR.to_string()
2414-
}
2415-
})
2416-
.to_string();
2407+
let stats_separator = STATS_SEPARATOR.get_or_init(|| {
2408+
if self.which.mode || self.which.percentiles {
2409+
std::env::var("QSV_STATS_SEPARATOR")
2410+
.unwrap_or_else(|_| DEFAULT_STATS_SEPARATOR.to_string())
2411+
} else {
2412+
DEFAULT_STATS_SEPARATOR.to_string()
2413+
}
2414+
});
24172415

24182416
// modes/antimodes & cardinality/uniqueness_ratio
24192417
// we do this second because we can use the sort order with cardinality, to skip sorting
24202418
// if its not required. This makes not only cardinality computation faster, it also makes
24212419
// modes/antimodes computation faster.
24222420
// We also need to know the cardinality to --infer-boolean should that be enabled
24232421
let mut cardinality = 0;
2424-
let mut mc_pieces = Vec::with_capacity(8);
2422+
let mut mc_pieces: Vec<String> = Vec::new();
24252423
match self.modes.as_mut() {
24262424
None => {
24272425
if self.which.cardinality {
2428-
mc_pieces.extend_from_slice(&[empty_string(), empty_string()]);
2426+
mc_pieces = vec![String::new(); 2];
24292427
}
24302428
if self.which.mode {
2431-
mc_pieces.extend_from_slice(&[
2432-
empty_string(),
2433-
empty_string(),
2434-
empty_string(),
2435-
empty_string(),
2436-
empty_string(),
2437-
empty_string(),
2438-
]);
2429+
mc_pieces = vec![String::new(); 6];
24392430
}
24402431
},
24412432
Some(ref mut v) => {
2433+
mc_pieces.reserve(8);
24422434
if self.which.cardinality {
24432435
cardinality = v.cardinality(column_sorted, 1);
24442436
#[allow(clippy::cast_precision_loss)]
24452437
let uniqueness_ratio = (cardinality as f64) / (record_count as f64);
2446-
mc_pieces.extend_from_slice(&[
2447-
itoa::Buffer::new().format(cardinality).to_owned(),
2448-
util::round_num(uniqueness_ratio, round_places),
2449-
]);
2438+
mc_pieces.push(itoa::Buffer::new().format(cardinality).to_owned());
2439+
mc_pieces.push(util::round_num(uniqueness_ratio, round_places));
24502440
}
24512441
if self.which.mode {
24522442
// mode/s & antimode/s
@@ -2474,12 +2464,12 @@ impl Stats {
24742464
modes_result
24752465
.iter()
24762466
.map(|c| util::visualize_whitespace(&String::from_utf8_lossy(c)))
2477-
.join(&stats_separator)
2467+
.join(stats_separator)
24782468
} else {
24792469
modes_result
24802470
.iter()
24812471
.map(|c| String::from_utf8_lossy(c))
2482-
.join(&stats_separator)
2472+
.join(stats_separator)
24832473
};
24842474

24852475
// antimode/s ============
@@ -2505,11 +2495,11 @@ impl Stats {
25052495
let antimodes_vals = &antimodes_result
25062496
.iter()
25072497
.map(|c| String::from_utf8_lossy(c))
2508-
.join(&stats_separator);
2498+
.join(stats_separator);
25092499

25102500
// if the antimodes result starts with the separator,
25112501
// it indicates that NULL is the first antimode. Add NULL to the list.
2512-
if antimodes_vals.starts_with(&stats_separator) {
2502+
if antimodes_vals.starts_with(stats_separator) {
25132503
antimodes_list.push_str("NULL");
25142504
}
25152505
antimodes_list.push_str(antimodes_vals);
@@ -2523,16 +2513,16 @@ impl Stats {
25232513
mc_pieces.extend_from_slice(&[
25242514
// mode/s
25252515
modes_list,
2526-
modes_count.to_string(),
2527-
mode_occurrences.to_string(),
2516+
itoa::Buffer::new().format(modes_count).to_owned(),
2517+
itoa::Buffer::new().format(mode_occurrences).to_owned(),
25282518
// antimode/s
25292519
if visualize_ws {
25302520
util::visualize_whitespace(&antimodes_list)
25312521
} else {
25322522
antimodes_list
25332523
},
2534-
antimodes_count.to_string(),
2535-
antimode_occurrences.to_string(),
2524+
itoa::Buffer::new().format(antimodes_count).to_owned(),
2525+
itoa::Buffer::new().format(antimode_occurrences).to_owned(),
25362526
]);
25372527
}
25382528
}
@@ -2720,7 +2710,7 @@ impl Stats {
27202710
// quartiles
27212711
// as q2==median, cache and reuse it if the --median or --mad flags are set
27222712
let mut existing_median = None;
2723-
let mut quartile_pieces = Vec::with_capacity(9);
2713+
let mut quartile_pieces: Vec<String> = Vec::new();
27242714
match self.unsorted_stats.as_mut().and_then(|v| match typ {
27252715
TInteger | TFloat | TDate | TDateTime => {
27262716
if self.which.quartiles {
@@ -2733,17 +2723,7 @@ impl Stats {
27332723
}) {
27342724
None => {
27352725
if self.which.quartiles {
2736-
quartile_pieces.extend_from_slice(&[
2737-
empty_string(),
2738-
empty_string(),
2739-
empty_string(),
2740-
empty_string(),
2741-
empty_string(),
2742-
empty_string(),
2743-
empty_string(),
2744-
empty_string(),
2745-
empty_string(),
2746-
]);
2726+
quartile_pieces = vec![String::new(); 9];
27472727
}
27482728
},
27492729
Some((q1, q2, q3)) => {
@@ -2773,6 +2753,7 @@ impl Stats {
27732753
// which in turn, is the basis of the fused multiply add version below
27742754
let skewness = (2.0f64.mul_add(-q2, q3) + q1) / iqr;
27752755

2756+
quartile_pieces.reserve(9);
27762757
if typ == TDateTime || typ == TDate {
27772758
// casting from f64 to i64 is OK, per
27782759
// https://doc.rust-lang.org/reference/expressions/operator-expr.html#numeric-cast
@@ -2901,7 +2882,7 @@ impl Stats {
29012882
.map(|p| util::round_num(*p, round_places))
29022883
.collect::<Vec<_>>()
29032884
};
2904-
record.push_field(&formatted_values.join(&stats_separator));
2885+
record.push_field(&formatted_values.join(stats_separator));
29052886
} else {
29062887
record.push_field(EMPTY_STR);
29072888
}

0 commit comments

Comments
 (0)