Skip to content

Commit 775bb88

Browse files
committed
perf: frequency microoptimize
- create OnceLock for EMPTY_VEC to remove repeated inits in hot path - use more idiomatic let else - remove unnecessry temp vars
1 parent 012ac15 commit 775bb88

File tree

1 file changed

+18
-15
lines changed

1 file changed

+18
-15
lines changed

src/cmd/frequency.rs

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ struct ProcessedFrequency {
290290
static UNIQUE_COLUMNS_VEC: OnceLock<Vec<usize>> = OnceLock::new();
291291
static COL_CARDINALITY_VEC: OnceLock<Vec<(String, u64)>> = OnceLock::new();
292292
static FREQ_ROW_COUNT: OnceLock<u64> = OnceLock::new();
293+
static EMPTY_VEC: OnceLock<Vec<(String, u64)>> = OnceLock::new();
293294

294295
pub fn run(argv: &[&str]) -> CliResult<()> {
295296
let mut args: Args = util::get_args(USAGE, argv)?;
@@ -316,14 +317,19 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
316317
util::mem_file_check(&path, false, args.flag_memcheck)?;
317318
}
318319

319-
// Create NULL value once to avoid repeated to_vec allocations
320+
// Create NULL_VAL and EMPTY_VEC once to avoid repeated to_vec allocations
321+
// safety: we're initializing the start of the program
320322
NULL_VAL
321323
.set(args.flag_null_text.as_bytes().to_vec())
322-
.map_err(|_| "Cannot set NULL_VAL")?;
324+
.unwrap();
325+
EMPTY_VEC.set(Vec::new()).unwrap();
323326

324-
let (headers, tables) = match args.rconfig().indexed()? {
325-
Some(ref mut idx) if util::njobs(args.flag_jobs) > 1 => args.parallel_ftables(idx),
326-
_ => args.sequential_ftables(),
327+
let (headers, tables) = if let Some(idx) = args.rconfig().indexed()?
328+
&& util::njobs(args.flag_jobs) > 1
329+
{
330+
args.parallel_ftables(&idx)
331+
} else {
332+
args.sequential_ftables()
327333
}?;
328334

329335
if is_json {
@@ -361,9 +367,8 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
361367
header.to_vec()
362368
};
363369

364-
let all_unique_header = unique_headers_vec.contains(&i);
365370
args.process_frequencies(
366-
all_unique_header,
371+
unique_headers_vec.contains(&i),
367372
abs_dec_places,
368373
row_count,
369374
&ftab,
@@ -422,13 +427,11 @@ impl Args {
422427
) {
423428
if all_unique_header {
424429
// For all-unique headers, create a single entry
425-
let all_unique_text = self.flag_all_unique_text.as_bytes().to_vec();
426-
let formatted_pct = self.format_percentage(100.0, abs_dec_places);
427430
processed_frequencies.push(ProcessedFrequency {
428-
value: all_unique_text,
431+
value: self.flag_all_unique_text.as_bytes().to_vec(),
429432
count: row_count,
430433
percentage: 100.0,
431-
formatted_percentage: formatted_pct,
434+
formatted_percentage: self.format_percentage(100.0, abs_dec_places),
432435
rank: 1.0, // Rank 1 for all-unique headers
433436
});
434437
} else {
@@ -444,12 +447,11 @@ impl Args {
444447

445448
// Convert to processed frequencies
446449
for (value, count, percentage, rank) in counts_to_process {
447-
let formatted_pct = self.format_percentage(percentage, abs_dec_places);
448450
processed_frequencies.push(ProcessedFrequency {
449451
value,
450452
count,
451453
percentage,
452-
formatted_percentage: formatted_pct,
454+
formatted_percentage: self.format_percentage(percentage, abs_dec_places),
453455
rank,
454456
});
455457
}
@@ -761,8 +763,9 @@ impl Args {
761763
// optimize the capacity of the freq_tables based on the cardinality of the columns
762764
// if sequential, use the cardinality from the stats cache
763765
// if parallel, use a default capacity of 1000 for non-unique columns
764-
let empty_vec = Vec::new();
765-
let col_cardinality_vec = COL_CARDINALITY_VEC.get().unwrap_or(&empty_vec);
766+
let col_cardinality_vec = COL_CARDINALITY_VEC
767+
.get()
768+
.unwrap_or(EMPTY_VEC.get().unwrap());
766769
let mut freq_tables: Vec<_> = if col_cardinality_vec.is_empty() {
767770
(0..nsel_len)
768771
.map(|_| Frequencies::with_capacity(1000))

0 commit comments

Comments
 (0)