@@ -238,6 +238,11 @@ const NON_UTF8_ERR: &str = "<Non-UTF8 ERROR>";
238238const EMPTY_BYTE_VEC : Vec < u8 > = Vec :: new ( ) ;
239239static STATS_RECORDS : OnceLock < HashMap < String , StatsData > > = OnceLock :: new ( ) ;
240240static NULL_VAL : OnceLock < Vec < u8 > > = OnceLock :: new ( ) ;
241+ static UNIQUE_COLUMNS_VEC : OnceLock < Vec < usize > > = OnceLock :: new ( ) ;
242+ static COL_CARDINALITY_VEC : OnceLock < Vec < ( String , u64 ) > > = OnceLock :: new ( ) ;
243+ static FREQ_ROW_COUNT : OnceLock < u64 > = OnceLock :: new ( ) ;
244+ static EMPTY_VEC : Vec < ( String , u64 ) > = Vec :: new ( ) ;
245+ static ALL_UNIQUE_TEXT : OnceLock < Vec < u8 > > = OnceLock :: new ( ) ;
241246// FrequencyEntry, FrequencyField and FrequencyOutput are
242247// structs for JSON output
243248#[ derive( Serialize ) ]
@@ -287,11 +292,6 @@ struct ProcessedFrequency {
287292 rank : f64 ,
288293}
289294
290- static UNIQUE_COLUMNS_VEC : OnceLock < Vec < usize > > = OnceLock :: new ( ) ;
291- static COL_CARDINALITY_VEC : OnceLock < Vec < ( String , u64 ) > > = OnceLock :: new ( ) ;
292- static FREQ_ROW_COUNT : OnceLock < u64 > = OnceLock :: new ( ) ;
293- static EMPTY_VEC : OnceLock < Vec < ( String , u64 ) > > = OnceLock :: new ( ) ;
294-
295295pub fn run ( argv : & [ & str ] ) -> CliResult < ( ) > {
296296 let mut args: Args = util:: get_args ( USAGE , argv) ?;
297297 let mut rconfig = args. rconfig ( ) ;
@@ -317,12 +317,16 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
317317 util:: mem_file_check ( & path, false , args. flag_memcheck ) ?;
318318 }
319319
320- // Create NULL_VAL and EMPTY_VEC once to avoid repeated to_vec allocations
321- // safety: we're initializing the start of the program
320+ // Create NULL_VAL & ALL_UNIQUE_TEXT once at the start to avoid
321+ // repeated string & vec allocations in hot loops.
322+ // safety: we're initializing the OnceLocks at the start of the program
322323 NULL_VAL
323324 . set ( args. flag_null_text . as_bytes ( ) . to_vec ( ) )
324325 . unwrap ( ) ;
325- EMPTY_VEC . set ( Vec :: new ( ) ) . unwrap ( ) ;
326+
327+ ALL_UNIQUE_TEXT
328+ . set ( args. flag_all_unique_text . as_bytes ( ) . to_vec ( ) )
329+ . unwrap ( ) ;
326330
327331 let ( headers, tables) = if let Some ( idx) = args. rconfig ( ) . indexed ( ) ?
328332 && util:: njobs ( args. flag_jobs ) > 1
@@ -352,12 +356,14 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
352356 let mut processed_frequencies: Vec < ProcessedFrequency > = Vec :: with_capacity ( head_ftables. len ( ) ) ;
353357 #[ allow( unused_assignments) ]
354358 let mut value_str = String :: with_capacity ( 100 ) ;
359+ let vis_whitespace = args. flag_vis_whitespace ;
355360
356361 // safety: we know that UNIQUE_COLUMNS has been previously set
357- // when compiling frequencies by sel_headers fn
362+ // when compiling frequencies by sel_headers fn in either sequential or parallel mode
358363 let unique_headers_vec = UNIQUE_COLUMNS_VEC . get ( ) . unwrap ( ) ;
359364
360365 let mut wtr = Config :: new ( args. flag_output . as_ref ( ) ) . writer ( ) ?;
366+ // write headers
361367 wtr. write_record ( vec ! [ "field" , "value" , "count" , "percentage" , "rank" ] ) ?;
362368
363369 for ( i, ( header, ftab) ) in head_ftables. enumerate ( ) {
@@ -386,7 +392,7 @@ pub fn run(argv: &[&str]) -> CliResult<()> {
386392
387393 row = vec ! [
388394 & * header_vec,
389- if args . flag_vis_whitespace {
395+ if vis_whitespace {
390396 value_str =
391397 util:: visualize_whitespace( & String :: from_utf8_lossy( & processed_freq. value) ) ;
392398 value_str. as_bytes( )
@@ -428,7 +434,7 @@ impl Args {
428434 if all_unique_header {
429435 // For all-unique headers, create a single entry
430436 processed_frequencies. push ( ProcessedFrequency {
431- value : self . flag_all_unique_text . as_bytes ( ) . to_vec ( ) ,
437+ value : ALL_UNIQUE_TEXT . get ( ) . unwrap ( ) . clone ( ) ,
432438 count : row_count,
433439 percentage : 100.0 ,
434440 formatted_percentage : self . format_percentage ( 100.0 , abs_dec_places) ,
@@ -763,9 +769,7 @@ impl Args {
763769 // optimize the capacity of the freq_tables based on the cardinality of the columns
764770 // if sequential, use the cardinality from the stats cache
765771 // if parallel, use a default capacity of 1000 for non-unique columns
766- let col_cardinality_vec = COL_CARDINALITY_VEC
767- . get ( )
768- . unwrap_or ( EMPTY_VEC . get ( ) . unwrap ( ) ) ;
772+ let col_cardinality_vec = COL_CARDINALITY_VEC . get ( ) . unwrap_or ( & EMPTY_VEC ) ;
769773 let mut freq_tables: Vec < _ > = if col_cardinality_vec. is_empty ( ) {
770774 ( 0 ..nsel_len)
771775 . map ( |_| Frequencies :: with_capacity ( 1000 ) )
@@ -964,7 +968,7 @@ impl Args {
964968 let unique_headers_vec = UNIQUE_COLUMNS_VEC . get ( ) . unwrap ( ) ;
965969 let mut processed_frequencies = Vec :: with_capacity ( head_ftables. len ( ) ) ;
966970 let abs_dec_places = self . flag_pct_dec_places . unsigned_abs ( ) as u32 ;
967- let stats_records = STATS_RECORDS . get ( ) ;
971+ // pre-allocate space for 17 field stats, see list below for details
968972 let mut field_stats: Vec < FieldStats > = Vec :: with_capacity ( 17 ) ;
969973
970974 for ( i, ( header, ftab) ) in head_ftables. enumerate ( ) {
@@ -1002,7 +1006,9 @@ impl Args {
10021006 } ;
10031007
10041008 // Get stats record for this field
1005- let stats_record = stats_records. and_then ( |records| records. get ( & field_name) ) ;
1009+ let stats_record = STATS_RECORDS
1010+ . get ( )
1011+ . and_then ( |records| records. get ( & field_name) ) ;
10061012
10071013 // Get data type and nullcount from stats record
10081014 let dtype = stats_record. map_or ( String :: new ( ) , |sr| sr. r#type . clone ( ) ) ;
0 commit comments