Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance #98

Merged
merged 7 commits into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,8 @@ is-it-maintained-open-issues = { repository = "dsietz/test-data-generation" }
maintenance = {status = "passively-maintained"}

[dependencies]
config = "0.10"
lazy_static = "1.4"
once_cell = "1.8"
log = "0.4"
log4rs = "1.0"
serde = "1.0"
serde_derive = "1.0"
serde_json = "1.0"
Expand All @@ -44,6 +42,9 @@ levenshtein = "1.0"
version = "1.7.0"
features = ["serde-1"]

[dev-dependencies]
log4rs = "1.0"

[profile.release]
opt-level = 3
debug = false
Expand Down Expand Up @@ -74,4 +75,4 @@ debug = false
rpath = false
lto = true
debug-assertions = false
codegen-units = 1
codegen-units = 1
4 changes: 2 additions & 2 deletions src/data_sample_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,12 @@ impl DataSampleParser {
false => {
info!("Prior version 0.2.1 detected. Trying to upgrade to latest version");

return Self::updgrade_to_latest_version(serialized);
return Self::upgrade_to_latest_version(serialized);
}
}
}

fn updgrade_to_latest_version(serialized: String) -> DataSampleParser {
fn upgrade_to_latest_version(serialized: String) -> DataSampleParser {
let dsp: Value = serde_json::from_str(&serialized).unwrap();
let prfils = dsp.get("profiles").unwrap();
let mut pm: ProfilesMap = ProfilesMap::new();
Expand Down
44 changes: 30 additions & 14 deletions src/engine/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ use std::thread;
use crate::Profile;
//use async_trait::async_trait;

macro_rules! regex {
($re:literal $(,)?) => {{
static RE: once_cell::sync::OnceCell<regex::Regex> = once_cell::sync::OnceCell::new();
RE.get_or_init(|| regex::Regex::new($re).unwrap())
}};
}

#[allow(dead_code)]
type PatternMap = BTreeMap<String, char>;

Expand Down Expand Up @@ -105,6 +112,7 @@ impl Fact {
/// let mut fact = Fact::new('r','c',0,0,2);
/// }
/// ```
#[inline]
pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32) -> Fact {
Fact {
key: k,
Expand Down Expand Up @@ -139,6 +147,7 @@ impl Fact {
/// assert_eq!(fact.pattern_placeholder, 'c');
/// }
/// ```
#[inline]
pub fn from_serialized(serialized: &str) -> Fact {
serde_json::from_str(&serialized).unwrap()
}
Expand All @@ -160,6 +169,7 @@ impl Fact {
/// // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}
/// }
///
#[inline]
pub fn serialize(&mut self) -> String {
serde_json::to_string(&self).unwrap()
}
Expand All @@ -183,6 +193,7 @@ impl Fact {
/// fact.set_next_key('d');
/// }
///
#[inline]
pub fn set_next_key(&mut self, nk: char) {
self.next_key = Some(nk);
}
Expand All @@ -206,6 +217,7 @@ impl Fact {
/// fact.set_prior_key('o');
/// }
///
#[inline]
pub fn set_prior_key(&mut self, pk: char) {
self.prior_key = Some(pk);
}
Expand All @@ -214,31 +226,31 @@ impl Fact {
/// Represents a symbolic pattern of an entity (String)
pub struct Pattern {
/// The regex rule used to find upper case consonants
regex_consonant_upper: Regex,
regex_consonant_upper: &'static Regex,
/// The regex rule used to find lower case consonants
regex_consonant_lower: Regex,
regex_consonant_lower: &'static Regex,
/// The regex rule used to find upper case vowels
regex_vowel_upper: Regex,
regex_vowel_upper: &'static Regex,
/// The regex rule used to find lower case vowels
regex_vowel_lower: Regex,
regex_vowel_lower: &'static Regex,
/// The regex rule used to find numeric digits
regex_numeric: Regex,
regex_numeric: &'static Regex,
/// The regex rule used to find punctuation
regex_punctuation: Regex,
regex_punctuation: &'static Regex,
/// The regex rule used to find white spaces
regex_space: Regex,
regex_space: &'static Regex,
}

impl Default for Pattern {
fn default() -> Self {
Pattern {
regex_consonant_upper: Regex::new(r"[B-DF-HJ-NP-TV-Z]").unwrap(),
regex_consonant_lower: Regex::new(r"[b-df-hj-np-tv-z]").unwrap(),
regex_vowel_upper: Regex::new(r"[A|E|I|O|U]").unwrap(),
regex_vowel_lower: Regex::new(r"[a|e|i|o|u]").unwrap(),
regex_numeric: Regex::new(r"[0-9]").unwrap(),
regex_punctuation: Regex::new(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]").unwrap(),
regex_space: Regex::new(r"[\s]").unwrap(),
regex_consonant_upper: regex!(r"[B-DF-HJ-NP-TV-Z]"),
regex_consonant_lower: regex!(r"[b-df-hj-np-tv-z]"),
regex_vowel_upper: regex!(r"[A|E|I|O|U]"),
regex_vowel_lower: regex!(r"[a|e|i|o|u]"),
regex_numeric: regex!(r"[0-9]"),
regex_punctuation: regex!(r"[.,\\/#!$%\\^&\\*;:{}=\\-_`~()\\?]"),
regex_space: regex!(r"[\s]"),
}
}
}
Expand Down Expand Up @@ -304,6 +316,7 @@ impl PatternDefinition {
/// //}
/// }
/// ```
#[inline]
pub fn analyze(&mut self, entity: &str) -> (String, Vec<Fact>) {
// record the length of the passed value
//self.size = entity.len() as u32;
Expand Down Expand Up @@ -346,6 +359,7 @@ impl PatternDefinition {
/// // will return a Fact that represents the char `W`
/// }
/// ```
#[inline]
pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact {
let c = entity.chars().nth(idx as usize).unwrap();
let pp = self.symbolize_char(c);
Expand Down Expand Up @@ -391,6 +405,7 @@ impl PatternDefinition {
/// println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string()));
/// }
/// ```
#[inline]
pub fn get(&self, key: &str) -> char {
*self.pattern_map.get(key).unwrap()
}
Expand All @@ -410,6 +425,7 @@ impl PatternDefinition {
/// // The pattern symbol for 'A' is V
/// }
/// ```
#[inline]
pub fn symbolize_char(&self, c: char) -> char {
// if you have to escape regex special characters: &*regex::escape(&*$c.to_string())
let mut symbol = self.pattern_map.get("Unknown");
Expand Down
16 changes: 8 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -669,20 +669,20 @@ impl Profile {
&& value.pattern_placeholder == *c
&& value.index_offset == idx as u32
{
facts.push(value.key.clone());
facts.push(value.key);

// if the value.key's prior char matches the prior generated char, then weight the value.key
// to increase the chance of it being used when generated
if value.prior_key.unwrap_or(' ') == prior_char {
facts.push(value.key.clone());
facts.push(value.key.clone());
facts.push(value.key);
facts.push(value.key);
}

// if the value.key's index_offset matches the current index, then weight the value.key
// to increase the chance of it being used when generated
if value.index_offset == idx as u32 {
facts.push(value.key.clone());
facts.push(value.key.clone());
facts.push(value.key);
facts.push(value.key);
}
}
}
Expand Down Expand Up @@ -808,8 +808,8 @@ impl Profile {
///
/// # Arguments
///
/// * `control: &String` - The string to compare against. This would be the real data from the data sample.</br>
/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.</br>
/// * `control: &str` - The string to compare against. This would be the real data from the data sample.</br>
/// * `experiment: &str` - The string to compare. This would be the generated data for which you want to find the percent difference.</br>
///
/// #Example
///
Expand All @@ -824,7 +824,7 @@ impl Profile {
/// assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64);
/// }
///
pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 {
pub fn realistic_test(&mut self, control: &str, experiment: &str) -> f64 {
realistic_test!(control, experiment)
}

Expand Down