Merge pull request #102 from dsietz/development

v0.3.4
dsietz · Dec 18, 2021 · 94abc70 · 94abc70
2 parents bc1cf98 + c062eec
commit 94abc70
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 39 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "test-data-generation"
-version = "0.3.3"
+version = "0.3.4"
 edition = "2018"
 authors = ["dsietz <davidsietz@yahoo.com>"]
 repository = "https://github.com/dsietz/test-data-generation.git"
@@ -34,8 +34,8 @@ serde_json   = "1.0"
 serde_yaml   = "0.8"
 yaml-rust    = "0.4"
 regex        = "1.3"
-rand         = "0.7"
-crossbeam    = "0.7"
+rand         = "0.8"
+crossbeam    = "0.8"
 csv          = "1.1"
 levenshtein  = "1.0"
 [dependencies.indexmap]

diff --git a/README.md b/README.md
@@ -43,20 +43,8 @@ or production environment (option #1 above)
 
 Here's what's new ...
 
-**0.3.0**
-+ [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90)
-  > Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file from a prior version, (e.g.: 0.2.1), however, it is not guaranteed.
-+ [Added issue #91](https://github.com/dsietz/test-data-generation/issues/91)
-  > Optional parameters for setting the delimiter when analyzing and generating csv files.
-
-**0.3.1**  
-+ [Fixed issue #93](https://github.com/dsietz/test-data-generation/issues/93)  
-
-**0.3.2** 
-+ [Fixed typos](https://github.com/dsietz/test-data-generation/pull/95)
-
-**0.3.3**
-+ [Improved performance](https://github.com/dsietz/test-data-generation/pull/98)
+**0.3.4**
++ [Upgrade crates and improve performance](https://github.com/dsietz/test-data-generation/pull/100)
 
 ## About
 

diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs
@@ -280,6 +280,7 @@ impl DataSampleParser {
         return rtn;
     }
 
+    #[inline]
     fn analyze_columns(&mut self, profile_keys: Vec<String>, columns: Vec<Vec<String>>) {
         let col_cnt = columns.len();
         let (tx, rx): (
@@ -394,8 +395,8 @@ impl DataSampleParser {
         for headers in rdr.headers() {
             for header in headers.iter() {
                 //add a Profile to the list of profiles to represent the field (indexed using the header label)
-                let p = Profile::new_with_id(format!("{}", header));
-                self.profiles.insert(format!("{}", header), p);
+                let p = Profile::new_with_id(header.to_string());
+                self.profiles.insert(header.to_string(), p);
             }
         }
 

diff --git a/src/engine/mod.rs b/src/engine/mod.rs
@@ -244,13 +244,13 @@ pub struct Pattern {
 impl Default for Pattern {
     fn default() -> Self {
         Pattern {
-            regex_consonant_upper: regex!(r"[B-DF-HJ-NP-TV-Z]"),
-            regex_consonant_lower: regex!(r"[b-df-hj-np-tv-z]"),
-            regex_vowel_upper: regex!(r"[A|E|I|O|U]"),
-            regex_vowel_lower: regex!(r"[a|e|i|o|u]"),
-            regex_numeric: regex!(r"[0-9]"),
-            regex_punctuation: regex!(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
-            regex_space: regex!(r"[\s]"),
+            regex_consonant_upper: regex!(r"(?-u)[B-DF-HJ-NP-TV-Z]"),
+            regex_consonant_lower: regex!(r"(?-u)[b-df-hj-np-tv-z]"),
+            regex_vowel_upper: regex!(r"(?-u)[A|E|I|O|U]"),
+            regex_vowel_lower: regex!(r"(?-u)[a|e|i|o|u]"),
+            regex_numeric: regex!(r"(?-u)[0-9]"),
+            regex_punctuation: regex!(r"(?-u)[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
+            regex_space: regex!(r"(?-u)[\s]"),
         }
     }
 }

diff --git a/src/lib.rs b/src/lib.rs
@@ -291,7 +291,7 @@ impl Profile {
     ///
     /// # Arguments
     ///
-    /// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").</br>
+    /// * `path: &str` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").</br>
     ///
     /// #Example
     ///
@@ -424,6 +424,7 @@ impl Profile {
     ///		assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1);
     /// }
     /// ```
+    #[inline]
     pub fn apply_facts(&mut self, pattern: String, facts: Vec<Fact>) -> Result<i32, String> {
         // balance the storing of facts across all the vectors that can be processed in parallel
         let mut i = 0;
@@ -433,7 +434,7 @@ impl Profile {
             }
 
             self.facts[i as usize].push(f);
-            i = i + 1;
+            i += 1;
         }
 
         // store the pattern
@@ -478,10 +479,11 @@ impl Profile {
     ///    	assert_eq!(profile.pattern_ranks, test);
     /// }
     /// ```
+    #[inline]
     pub fn cum_patternmap(&mut self) {
         // Reference: https://users.rust-lang.org/t/cannot-infer-an-appropriate-lifetime-for-autoref/13360/3
 
-        debug!("calucating the cumulative percentage of occurences for data point patterns...");
+        debug!("calculating the cumulative percentage of occurences for data point patterns...");
 
         // calculate the percentage by patterns
         // -> {"CcvccpSCvcc": 14.285714285714285, "CvccvccpSCvccvc": 14.285714285714285, "CvccvccpSCvccvv": 28.57142857142857, "CvcvcccpSCcvcv": 14.285714285714285, "CvcvpSCvccc": 14.285714285714285, "V~CcvvcpSCvccc": 14.285714285714285}
@@ -540,8 +542,9 @@ impl Profile {
     ///     // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)]
     /// }
     /// ```
+    #[inline]
     pub fn cum_sizemap(&mut self) {
-        debug!("calucating the cumulative percentage of occurences for data point sizes...");
+        debug!("calculating the cumulative percentage of occurences for data point sizes...");
         // calculate the percentage by sizes
         // -> {11: 28.57142857142857, 14: 14.285714285714285, 15: 57.14285714285714}
         let mut size_ranks = SizeRankMap::new();
@@ -556,13 +559,13 @@ impl Profile {
         // sort the ranks by percentages in decreasing order
         // -> [(15, 57.14285714285714), (11, 28.57142857142857), (14, 14.285714285714285)]
         let mut sizes = size_ranks.iter().collect::<Vec<_>>();
-        sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap());
+        sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(a).unwrap());
 
         // calculate the cumulative sum of the size rankings
         // -> [(15, 57.14285714285714), (11, 85.71428571428571), (14, 100)]
         self.size_ranks = sizes
             .iter()
-            .scan((0 as u32, 0.00 as f64), |state, &(&k, &v)| {
+            .scan((0_u32, 0.00_f64), |state, &(&k, &v)| {
                 *state = (k, state.1 + &v);
                 Some(*state)
             })
@@ -592,6 +595,7 @@ impl Profile {
     ///		print!("The test data {:?} was generated.", profile.generate());
     /// }
     /// ```
+    #[inline]
     pub fn generate(&mut self) -> String {
         // 1. get a random number
         let s: f64 = random_percentage!();
@@ -611,9 +615,7 @@ impl Profile {
             .clone();
 
         // lastly, generate the test data using facts that adhere to the pattern
-        let generated = self.generate_from_pattern(pattern.0);
-
-        generated
+        self.generate_from_pattern(pattern.0)
     }
 
     /// This function generates realistic test data based on the sample data that was analyzed.
@@ -643,6 +645,7 @@ impl Profile {
     ///     assert_eq!(generated.len(), 10);
     /// }
     /// ```
+    #[inline]
     pub fn generate_from_pattern(&self, pattern: String) -> String {
         let pattern_chars = pattern.chars().collect::<Vec<char>>();
         let mut generated = String::new();
@@ -707,7 +710,7 @@ impl Profile {
 
                 if rnd_start >= rnd_end {
                     //generated.push(fact_options[0 as usize]);
-                    fact_options[0 as usize]
+                    fact_options[0_usize]
                 } else {
                     let x: u32 = random_between!(rnd_start, rnd_end);
                     //prev_char = fact_options[x as usize];
@@ -770,7 +773,7 @@ impl Profile {
                 percent_similarity.iter().sum::<f64>() as f64 / percent_similarity.len() as f64;
             debug!("Percent similarity is {} ...", &percent);
 
-            if percent >= 80 as f64 {
+            if percent >= 80_f64 {
                 self.analyze(&experiment);
             }
         }
@@ -824,6 +827,7 @@ impl Profile {
     ///     assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64);
     /// }
     ///
+    #[inline]
     pub fn realistic_test(&mut self, control: &str, experiment: &str) -> f64 {
         realistic_test!(control, experiment)
     }
@@ -838,6 +842,7 @@ impl Profile {
     ///         The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)</br>
     ///         NOTE: The default number of processors is 4.
     ///
+    #[inline]
     fn new_facts(p: u8) -> Vec<Vec<Fact>> {
         let mut vec_main = Vec::new();
 

diff --git a/src/macros.rs b/src/macros.rs
@@ -41,8 +41,9 @@ macro_rules! random_percentage {
         use rand::{thread_rng, Rng};
 
         let mut rng = thread_rng();
+        let nbr: f64 = rng.gen_range(0_f64..100_f64);
 
-        rng.gen_range::<f64, f64, f64>(0 as f64, 100 as f64)
+        nbr
     }};
 }
 
@@ -69,7 +70,7 @@ macro_rules! random_between {
         use rand::{thread_rng, Rng};
 
         let mut rng = thread_rng();
-        let nbr = rng.gen_range::<u32, u32, u32>($a as u32, $b as u32);
+        let nbr: u32 = rng.gen_range($a as u32..$b as u32);
 
         nbr
     }};