Skip to content

Commit

Permalink
Merge pull request #98 from jqnatividad/performance
Browse files Browse the repository at this point in the history
Performance
  • Loading branch information
dsietz committed Nov 17, 2021
2 parents db62782 + db1b780 commit 2462c16
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 28 deletions.
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ is-it-maintained-open-issues = { repository = "dsietz/test-data-generation" }
maintenance = {status = "passively-maintained"}

[dependencies]
config = "0.10"
lazy_static = "1.4"
once_cell = "1.8"
log = "0.4"
log4rs = "1.0"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
Expand All @@ -44,6 +42,9 @@ levenshtein = "1.0"
version = "1.7.0"
features = ["serde-1"]

[dev-dependencies]
log4rs = "1.0"

[profile.release]
opt-level = 3
debug = false
Expand Down Expand Up @@ -74,4 +75,4 @@ debug = false
rpath = false
lto = true
debug-assertions = false
codegen-units = 1
codegen-units = 1
4 changes: 2 additions & 2 deletions src/data_sample_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,12 @@ impl DataSampleParser {
false => {
info!("Prior version 0.2.1 detected. Trying to upgrade to latest version");

return Self::updgrade_to_latest_version(serialized);
return Self::upgrade_to_latest_version(serialized);
}
}
}

fn updgrade_to_latest_version(serialized: String) -> DataSampleParser {
fn upgrade_to_latest_version(serialized: String) -> DataSampleParser {
let dsp: Value = serde_json::from_str(&serialized).unwrap();
let prfils = dsp.get("profiles").unwrap();
let mut pm: ProfilesMap = ProfilesMap::new();
Expand Down
44 changes: 30 additions & 14 deletions src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ use std::thread;
use crate::Profile;
//use async_trait::async_trait;

macro_rules! regex {
($re:literal $(,)?) => {{
static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}

#[allow(dead_code)]
type PatternMap = BTreeMap<String, char>;

Expand Down Expand Up @@ -105,6 +112,7 @@ impl Fact {
/// let mut fact = Fact::new('r','c',0,0,2);
/// }
/// ```
#[inline]
pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32) -> Fact {
Fact {
key: k,
Expand Down Expand Up @@ -139,6 +147,7 @@ impl Fact {
/// assert_eq!(fact.pattern_placeholder, 'c');
/// }
/// ```
#[inline]
pub fn from_serialized(serialized: &str) -> Fact {
serde_json::from_str(&serialized).unwrap()
}
Expand All @@ -160,6 +169,7 @@ impl Fact {
/// // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}
/// }
///
#[inline]
pub fn serialize(&mut self) -> String {
serde_json::to_string(&self).unwrap()
}
Expand All @@ -183,6 +193,7 @@ impl Fact {
/// fact.set_next_key('d');
/// }
///
#[inline]
pub fn set_next_key(&mut self, nk: char) {
self.next_key = Some(nk);
}
Expand All @@ -206,6 +217,7 @@ impl Fact {
/// fact.set_prior_key('o');
/// }
///
#[inline]
pub fn set_prior_key(&mut self, pk: char) {
self.prior_key = Some(pk);
}
Expand All @@ -214,31 +226,31 @@ impl Fact {
/// Represents a symbolic pattern of an entity (String)
pub struct Pattern {
/// The regex rule used to find upper case consonants
regex_consonant_upper: Regex,
regex_consonant_upper: &'static Regex,
/// The regex rule used to find lower case consonants
regex_consonant_lower: Regex,
regex_consonant_lower: &'static Regex,
/// The regex rule used to find upper case vowels
regex_vowel_upper: Regex,
regex_vowel_upper: &'static Regex,
/// The regex rule used to find lower case vowels
regex_vowel_lower: Regex,
regex_vowel_lower: &'static Regex,
/// The regex rule used to find numeric digits
regex_numeric: Regex,
regex_numeric: &'static Regex,
/// The regex rule used to find punctuation
regex_punctuation: Regex,
regex_punctuation: &'static Regex,
/// The regex rule used to find white spaces
regex_space: Regex,
regex_space: &'static Regex,
}

impl Default for Pattern {
fn default() -> Self {
Pattern {
regex_consonant_upper: Regex::new(r"[B-DF-HJ-NP-TV-Z]").unwrap(),
regex_consonant_lower: Regex::new(r"[b-df-hj-np-tv-z]").unwrap(),
regex_vowel_upper: Regex::new(r"[A|E|I|O|U]").unwrap(),
regex_vowel_lower: Regex::new(r"[a|e|i|o|u]").unwrap(),
regex_numeric: Regex::new(r"[0-9]").unwrap(),
regex_punctuation: Regex::new(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]").unwrap(),
regex_space: Regex::new(r"[\s]").unwrap(),
regex_consonant_upper: regex!(r"[B-DF-HJ-NP-TV-Z]"),
regex_consonant_lower: regex!(r"[b-df-hj-np-tv-z]"),
regex_vowel_upper: regex!(r"[A|E|I|O|U]"),
regex_vowel_lower: regex!(r"[a|e|i|o|u]"),
regex_numeric: regex!(r"[0-9]"),
regex_punctuation: regex!(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
regex_space: regex!(r"[\s]"),
}
}
}
Expand Down Expand Up @@ -304,6 +316,7 @@ impl PatternDefinition {
/// //}
/// }
/// ```
#[inline]
pub fn analyze(&mut self, entity: &str) -> (String, Vec<Fact>) {
// record the length of the passed value
//self.size = entity.len() as u32;
Expand Down Expand Up @@ -346,6 +359,7 @@ impl PatternDefinition {
/// // will return a Fact that represents the char `W`
/// }
/// ```
#[inline]
pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact {
let c = entity.chars().nth(idx as usize).unwrap();
let pp = self.symbolize_char(c);
Expand Down Expand Up @@ -391,6 +405,7 @@ impl PatternDefinition {
/// println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string()));
/// }
/// ```
#[inline]
pub fn get(&self, key: &str) -> char {
*self.pattern_map.get(key).unwrap()
}
Expand All @@ -410,6 +425,7 @@ impl PatternDefinition {
/// // The pattern symbol for 'A' is V
/// }
/// ```
#[inline]
pub fn symbolize_char(&self, c: char) -> char {
// if you have to escape regex special characters: &*regex::escape(&*$c.to_string())
let mut symbol = self.pattern_map.get("Unknown");
Expand Down
16 changes: 8 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -669,20 +669,20 @@ impl Profile {
&& value.pattern_placeholder == *c
&& value.index_offset == idx as u32
{
facts.push(value.key.clone());
facts.push(value.key);

// if the value.key's prior char matches the prior generated char, then weight the value.key
// to increase the chance of it being used when generated
if value.prior_key.unwrap_or(' ') == prior_char {
facts.push(value.key.clone());
facts.push(value.key.clone());
facts.push(value.key);
facts.push(value.key);
}

// if the value.key's index_offset matches the current index, then weight the value.key
// to increase the chance of it being used when generated
if value.index_offset == idx as u32 {
facts.push(value.key.clone());
facts.push(value.key.clone());
facts.push(value.key);
facts.push(value.key);
}
}
}
Expand Down Expand Up @@ -808,8 +808,8 @@ impl Profile {
///
/// # Arguments
///
/// * `control: &String` - The string to compare against. This would be the real data from the data sample.</br>
/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.</br>
/// * `control: &str` - The string to compare against. This would be the real data from the data sample.</br>
/// * `experiment: &str` - The string to compare. This would be the generated data for which you want to find the percent difference.</br>
///
/// #Example
///
Expand All @@ -824,7 +824,7 @@ impl Profile {
/// assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64);
/// }
///
pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 {
pub fn realistic_test(&mut self, control: &str, experiment: &str) -> f64 {
realistic_test!(control, experiment)
}

Expand Down

0 comments on commit 2462c16

Please sign in to comment.