Skip to content

Commit

Permalink
Merge pull request #102 from dsietz/development
Browse files Browse the repository at this point in the history
v0.3.4
  • Loading branch information
dsietz committed Dec 18, 2021
2 parents bc1cf98 + c062eec commit 94abc70
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 39 deletions.
6 changes: 3 additions & 3 deletions Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "test-data-generation"
version = "0.3.3"
version = "0.3.4"
edition = "2018"
authors = ["dsietz <davidsietz@yahoo.com>"]
repository = "https://github.com/dsietz/test-data-generation.git"
Expand Down Expand Up @@ -34,8 +34,8 @@ serde_json = "1.0"
serde_yaml = "0.8"
yaml-rust = "0.4"
regex = "1.3"
rand = "0.7"
crossbeam = "0.7"
rand = "0.8"
crossbeam = "0.8"
csv = "1.1"
levenshtein = "1.0"
[dependencies.indexmap]
Expand Down
16 changes: 2 additions & 14 deletions README.md
Expand Up @@ -43,20 +43,8 @@ or production environment (option #1 above)

Here's what's new ...

**0.3.0**
+ [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90)
> Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file from a prior version, (e.g.: 0.2.1), however, it is not guaranteed.
+ [Added issue #91](https://github.com/dsietz/test-data-generation/issues/91)
> Optional parameters for setting the delimiter when analyzing and generating csv files.
**0.3.1**
+ [Fixed issue #93](https://github.com/dsietz/test-data-generation/issues/93)

**0.3.2**
+ [Fixed typos](https://github.com/dsietz/test-data-generation/pull/95)

**0.3.3**
+ [Improved performance](https://github.com/dsietz/test-data-generation/pull/98)
**0.3.4**
+ [Upgrade crates and improve performance](https://github.com/dsietz/test-data-generation/pull/100)

## About

Expand Down
5 changes: 3 additions & 2 deletions src/data_sample_parser.rs
Expand Up @@ -280,6 +280,7 @@ impl DataSampleParser {
return rtn;
}

#[inline]
fn analyze_columns(&mut self, profile_keys: Vec<String>, columns: Vec<Vec<String>>) {
let col_cnt = columns.len();
let (tx, rx): (
Expand Down Expand Up @@ -394,8 +395,8 @@ impl DataSampleParser {
for headers in rdr.headers() {
for header in headers.iter() {
//add a Profile to the list of profiles to represent the field (indexed using the header label)
let p = Profile::new_with_id(format!("{}", header));
self.profiles.insert(format!("{}", header), p);
let p = Profile::new_with_id(header.to_string());
self.profiles.insert(header.to_string(), p);
}
}

Expand Down
14 changes: 7 additions & 7 deletions src/engine/mod.rs
Expand Up @@ -244,13 +244,13 @@ pub struct Pattern {
impl Default for Pattern {
fn default() -> Self {
Pattern {
regex_consonant_upper: regex!(r"[B-DF-HJ-NP-TV-Z]"),
regex_consonant_lower: regex!(r"[b-df-hj-np-tv-z]"),
regex_vowel_upper: regex!(r"[A|E|I|O|U]"),
regex_vowel_lower: regex!(r"[a|e|i|o|u]"),
regex_numeric: regex!(r"[0-9]"),
regex_punctuation: regex!(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
regex_space: regex!(r"[\s]"),
regex_consonant_upper: regex!(r"(?-u)[B-DF-HJ-NP-TV-Z]"),
regex_consonant_lower: regex!(r"(?-u)[b-df-hj-np-tv-z]"),
regex_vowel_upper: regex!(r"(?-u)[A|E|I|O|U]"),
regex_vowel_lower: regex!(r"(?-u)[a|e|i|o|u]"),
regex_numeric: regex!(r"(?-u)[0-9]"),
regex_punctuation: regex!(r"(?-u)[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
regex_space: regex!(r"(?-u)[\s]"),
}
}
}
Expand Down
27 changes: 16 additions & 11 deletions src/lib.rs
Expand Up @@ -291,7 +291,7 @@ impl Profile {
///
/// # Arguments
///
/// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").</br>
/// * `path: &str` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").</br>
///
/// #Example
///
Expand Down Expand Up @@ -424,6 +424,7 @@ impl Profile {
/// assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1);
/// }
/// ```
#[inline]
pub fn apply_facts(&mut self, pattern: String, facts: Vec<Fact>) -> Result<i32, String> {
// balance the storing of facts across all the vectors that can be processed in parallel
let mut i = 0;
Expand All @@ -433,7 +434,7 @@ impl Profile {
}

self.facts[i as usize].push(f);
i = i + 1;
i += 1;
}

// store the pattern
Expand Down Expand Up @@ -478,10 +479,11 @@ impl Profile {
/// assert_eq!(profile.pattern_ranks, test);
/// }
/// ```
#[inline]
pub fn cum_patternmap(&mut self) {
// Reference: https://users.rust-lang.org/t/cannot-infer-an-appropriate-lifetime-for-autoref/13360/3

debug!("calucating the cumulative percentage of occurences for data point patterns...");
debug!("calculating the cumulative percentage of occurences for data point patterns...");

// calculate the percentage by patterns
// -> {"CcvccpSCvcc": 14.285714285714285, "CvccvccpSCvccvc": 14.285714285714285, "CvccvccpSCvccvv": 28.57142857142857, "CvcvcccpSCcvcv": 14.285714285714285, "CvcvpSCvccc": 14.285714285714285, "V~CcvvcpSCvccc": 14.285714285714285}
Expand Down Expand Up @@ -540,8 +542,9 @@ impl Profile {
/// // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)]
/// }
/// ```
#[inline]
pub fn cum_sizemap(&mut self) {
debug!("calucating the cumulative percentage of occurences for data point sizes...");
debug!("calculating the cumulative percentage of occurences for data point sizes...");
// calculate the percentage by sizes
// -> {11: 28.57142857142857, 14: 14.285714285714285, 15: 57.14285714285714}
let mut size_ranks = SizeRankMap::new();
Expand All @@ -556,13 +559,13 @@ impl Profile {
// sort the ranks by percentages in decreasing order
// -> [(15, 57.14285714285714), (11, 28.57142857142857), (14, 14.285714285714285)]
let mut sizes = size_ranks.iter().collect::<Vec<_>>();
sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap());
sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(a).unwrap());

// calculate the cumulative sum of the size rankings
// -> [(15, 57.14285714285714), (11, 85.71428571428571), (14, 100)]
self.size_ranks = sizes
.iter()
.scan((0 as u32, 0.00 as f64), |state, &(&k, &v)| {
.scan((0_u32, 0.00_f64), |state, &(&k, &v)| {
*state = (k, state.1 + &v);
Some(*state)
})
Expand Down Expand Up @@ -592,6 +595,7 @@ impl Profile {
/// print!("The test data {:?} was generated.", profile.generate());
/// }
/// ```
#[inline]
pub fn generate(&mut self) -> String {
// 1. get a random number
let s: f64 = random_percentage!();
Expand All @@ -611,9 +615,7 @@ impl Profile {
.clone();

// lastly, generate the test data using facts that adhere to the pattern
let generated = self.generate_from_pattern(pattern.0);

generated
self.generate_from_pattern(pattern.0)
}

/// This function generates realistic test data based on the sample data that was analyzed.
Expand Down Expand Up @@ -643,6 +645,7 @@ impl Profile {
/// assert_eq!(generated.len(), 10);
/// }
/// ```
#[inline]
pub fn generate_from_pattern(&self, pattern: String) -> String {
let pattern_chars = pattern.chars().collect::<Vec<char>>();
let mut generated = String::new();
Expand Down Expand Up @@ -707,7 +710,7 @@ impl Profile {

if rnd_start >= rnd_end {
//generated.push(fact_options[0 as usize]);
fact_options[0 as usize]
fact_options[0_usize]
} else {
let x: u32 = random_between!(rnd_start, rnd_end);
//prev_char = fact_options[x as usize];
Expand Down Expand Up @@ -770,7 +773,7 @@ impl Profile {
percent_similarity.iter().sum::<f64>() as f64 / percent_similarity.len() as f64;
debug!("Percent similarity is {} ...", &percent);

if percent >= 80 as f64 {
if percent >= 80_f64 {
self.analyze(&experiment);
}
}
Expand Down Expand Up @@ -824,6 +827,7 @@ impl Profile {
/// assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64);
/// }
///
#[inline]
pub fn realistic_test(&mut self, control: &str, experiment: &str) -> f64 {
realistic_test!(control, experiment)
}
Expand All @@ -838,6 +842,7 @@ impl Profile {
/// The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)</br>
/// NOTE: The default number of processors is 4.
///
#[inline]
fn new_facts(p: u8) -> Vec<Vec<Fact>> {
let mut vec_main = Vec::new();

Expand Down
5 changes: 3 additions & 2 deletions src/macros.rs
Expand Up @@ -41,8 +41,9 @@ macro_rules! random_percentage {
use rand::{thread_rng, Rng};

let mut rng = thread_rng();
let nbr: f64 = rng.gen_range(0_f64..100_f64);

rng.gen_range::<f64, f64, f64>(0 as f64, 100 as f64)
nbr
}};
}

Expand All @@ -69,7 +70,7 @@ macro_rules! random_between {
use rand::{thread_rng, Rng};

let mut rng = thread_rng();
let nbr = rng.gen_range::<u32, u32, u32>($a as u32, $b as u32);
let nbr: u32 = rng.gen_range($a as u32..$b as u32);

nbr
}};
Expand Down

0 comments on commit 94abc70

Please sign in to comment.