@@ -2367,15 +2367,14 @@ impl Stats {
23672367 let typ = self . typ ;
23682368 // prealloc memory for performance
23692369 // we have MAX_STAT_COLUMNS columns at most with --everything
2370- let empty_string = String :: new;
23712370 let mut record = csv:: StringRecord :: with_capacity ( 512 , MAX_STAT_COLUMNS ) ;
23722371
2373- // min/max/range/sort_order/sortiness
2372+ // min/max/range/sort_order/sortiness (5 fields)
23742373 // we do this first as we want to get the sort_order, so we can skip sorting if not
23752374 // required. We also need to do this before --infer-boolean because we need to know
23762375 // the min/max values to determine if the range is equal to the supported boolean
23772376 // ranges as specified by --boolean-patterns.
2378- let mut minmax_range_sortorder_pieces = Vec :: with_capacity ( 5 ) ;
2377+ let minmax_range_sortorder_pieces: Vec < String > ;
23792378 let mut minval = String :: new ( ) ;
23802379 let mut maxval = String :: new ( ) ;
23812380 let mut column_sorted = false ;
@@ -2390,15 +2389,9 @@ impl Stats {
23902389 if mm. 3 . starts_with ( "Ascending" ) {
23912390 column_sorted = true ;
23922391 }
2393- minmax_range_sortorder_pieces. extend_from_slice ( & [ mm. 0 , mm. 1 , mm. 2 , mm. 3 , mm. 4 ] ) ;
2392+ minmax_range_sortorder_pieces = vec ! [ mm. 0 , mm. 1 , mm. 2 , mm. 3 , mm. 4 ] ;
23942393 } else {
2395- minmax_range_sortorder_pieces. extend_from_slice ( & [
2396- empty_string ( ) ,
2397- empty_string ( ) ,
2398- empty_string ( ) ,
2399- empty_string ( ) ,
2400- empty_string ( ) ,
2401- ] ) ;
2394+ minmax_range_sortorder_pieces = vec ! [ String :: new( ) ; 5 ] ;
24022395 }
24032396
24042397 let record_count = * RECORD_COUNT . get ( ) . unwrap_or ( & 1 ) ;
@@ -2413,7 +2406,7 @@ impl Stats {
24132406 }
24142407 } ) ;
24152408
2416- // modes/antimodes & cardinality/uniqueness_ratio
2409+ // cardinality, uniqueness_ratio & modes/antimodes (3 fields each) - 8 total fields
24172410 // we do this second because we can use the sort order with cardinality, to skip sorting
24182411 // if its not required. This makes not only cardinality computation faster, it also makes
24192412 // modes/antimodes computation faster.
@@ -2433,10 +2426,12 @@ impl Stats {
24332426 mc_pieces. reserve ( 8 ) ;
24342427 if self . which . cardinality {
24352428 cardinality = v. cardinality ( column_sorted, 1 ) ;
2436- #[ allow( clippy:: cast_precision_loss) ]
2437- let uniqueness_ratio = ( cardinality as f64 ) / ( record_count as f64 ) ;
24382429 mc_pieces. push ( itoa:: Buffer :: new ( ) . format ( cardinality) . to_owned ( ) ) ;
2439- mc_pieces. push ( util:: round_num ( uniqueness_ratio, round_places) ) ;
2430+ // uniqueness_ratio = cardinality / record_count
2431+ mc_pieces. push ( util:: round_num (
2432+ ( cardinality as f64 ) / ( record_count as f64 ) ,
2433+ round_places,
2434+ ) ) ;
24402435 }
24412436 if self . which . mode {
24422437 // mode/s & antimode/s
@@ -2445,7 +2440,7 @@ impl Stats {
24452440 mc_pieces. extend_from_slice (
24462441 // modes - short-circuit modes calculation as there is none
24472442 & [
2448- empty_string ( ) ,
2443+ String :: new ( ) ,
24492444 "0" . to_string ( ) ,
24502445 "0" . to_string ( ) ,
24512446 // antimodes - instead of returning everything, just say *ALL
@@ -2588,7 +2583,7 @@ impl Stats {
25882583 record. push_field ( field) ;
25892584 }
25902585
2591- // min/max/sum/avg/stddev/variance/cv length
2586+ // min/max/sum/avg/stddev/variance/cv length (7 fields)
25922587 // we only show string length stats for String type
25932588 if typ != FieldType :: TString {
25942589 for _ in 0 ..7 {
@@ -2627,7 +2622,7 @@ impl Stats {
26272622 }
26282623 }
26292624
2630- // mean, sem, geometric_mean, harmonic_mean, stddev, variance & cv
2625+ // mean, sem, geometric_mean, harmonic_mean, stddev, variance & cv (7 fields)
26312626 if typ == TString || typ == TNull {
26322627 for _ in 0 ..7 {
26332628 record. push_field ( EMPTY_STR ) ;
@@ -2703,11 +2698,12 @@ impl Stats {
27032698 // sparsity
27042699 #[ allow( clippy:: cast_precision_loss) ]
27052700 record. push_field ( & util:: round_num (
2706- self . nullcount as f64 / * RECORD_COUNT . get ( ) . unwrap_or ( & 1 ) as f64 ,
2701+ self . nullcount as f64 / record_count as f64 ,
27072702 round_places,
27082703 ) ) ;
27092704
2710- // quartiles
2705+ // quartiles: lower_outer_fence, lower_inner_fence, q1, q2_median, q3, iqr,
2706+ // upper_inner_fence, upper_outer_fence, skewness (9 fields)
27112707 // as q2==median, cache and reuse it if the --median or --mad flags are set
27122708 let mut existing_median = None ;
27132709 let mut quartile_pieces: Vec < String > = Vec :: new ( ) ;
@@ -2730,7 +2726,7 @@ impl Stats {
27302726 existing_median = Some ( q2) ;
27312727 let iqr = q3 - q1;
27322728
2733- // use fused multiply add (mul_add) when possible
2729+ // use fused multiply add (mul_add)
27342730 // fused mul_add is more accurate & is more performant if the
27352731 // target architecture has a dedicated `fma` CPU instruction
27362732 // https://doc.rust-lang.org/std/primitive.f64.html#method.mul_add
0 commit comments