From f260fbc62f008fbcaa9afd8befe94647129f8ed1 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sat, 13 Nov 2021 18:27:11 -0500 Subject: [PATCH 01/13] Issue #90 --- .github/workflows/development.yaml | 119 +++++++++++++++++++++++ .github/workflows/master.yaml | 118 +++++++++++++++++++++++ .travis.yml | 48 ---------- Cargo.toml | 3 + src/data_sample_parser.rs | 143 ++++++++++++++++++---------- src/lib.rs | 1 + tests/samples/sample-0.2.1-dsp.json | 1 + tests/samples/sample-00-dsp.json | 2 +- tests/samples/sample-02.csv | 6 ++ 9 files changed, 344 insertions(+), 97 deletions(-) create mode 100644 .github/workflows/development.yaml create mode 100644 .github/workflows/master.yaml delete mode 100644 .travis.yml create mode 100644 tests/samples/sample-0.2.1-dsp.json create mode 100644 tests/samples/sample-02.csv diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml new file mode 100644 index 0000000..de5684e --- /dev/null +++ b/.github/workflows/development.yaml @@ -0,0 +1,119 @@ +name: Development + +on: + push: + branches: [ development ] + pull_request: + branches: [ development ] + +env: + CARGO_TERM_COLOR: always + +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: check + + test: + name: Test Suite + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy + + audit: + name: Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + coveralls-grcov: + name: Code Coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install stable toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Cargo clean + uses: actions-rs/cargo@v1 + with: + command: clean + - name: Gather coverage data + uses: actions-rs/tarpaulin@v0.1 + with: + version: '0.15.0' + out-type: 'Lcov' + run-types: Tests + args: '-- --test-threads 1' + - name: Coveralls upload + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + #coveralls-token: ${{ secrets.COVERALLS_TOKEN }} + path-to-lcov: lcov.info + parallel: true + + grcov_finalize: + name: Grcov Finalize + runs-on: ubuntu-latest + needs: coveralls-grcov + steps: + - name: Coveralls finalization + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + parallel-finished: true \ No newline at end of file diff --git a/.github/workflows/master.yaml b/.github/workflows/master.yaml new file mode 100644 index 0000000..1b35556 --- /dev/null +++ b/.github/workflows/master.yaml @@ -0,0 +1,118 @@ +name: Master + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +env: + CARGO_TERM_COLOR: always + +jobs: + check: + name: Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: check + + test: + name: Test Suite + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - uses: actions-rs/cargo@v1 + with: + command: test + + fmt: + name: Rustfmt + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add rustfmt + - uses: actions-rs/cargo@v1 + with: + command: fmt + args: --all -- --check + + clippy: + name: Clippy + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - run: rustup component add clippy + - uses: actions-rs/cargo@v1 + with: + command: clippy + + audit: + name: Audit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/audit-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + + coveralls-grcov: + name: Code Coverage + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Install stable toolchain + uses: actions-rs/toolchain@v1 + with: + toolchain: stable + override: true + - name: Cargo clean + uses: actions-rs/cargo@v1 + with: + command: clean + - name: Gather coverage data + uses: actions-rs/tarpaulin@v0.1 + with: + version: '0.15.0' + out-type: 'Lcov' + run-types: Tests + args: '-- --test-threads 1' + - name: Coveralls upload + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + path-to-lcov: lcov.info + parallel: true + + grcov_finalize: + name: Grcov Finalize + runs-on: ubuntu-latest + needs: coveralls-grcov + steps: + - name: Coveralls finalization + uses: coverallsapp/github-action@master + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + parallel-finished: true \ No newline at end of file diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 55e9813..0000000 --- a/.travis.yml +++ /dev/null @@ -1,48 +0,0 @@ -sudo: true -os: linux -language: rust -cache: cargo -rust: - - stable -matrix: - allow_failures: - - rust: nightly -before_script: - - | - pip install 'travis-cargo<0.2' --user && - export PATH=$HOME/.local/bin:$PATH -addons: - apt: - packages: - - binutils-dev - - libcurl4-openssl-dev - - libelf-dev - - libdw-dev - - libiberty-dev - - cmake - - gcc - - zlib1g-dev -script: - travis-cargo build && - travis-cargo test && - travis-cargo bench -- --no-run -after_success: - - | - wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz && - tar xzf master.tar.gz && - cd kcov-master && - mkdir build && - cd build && - cmake .. && - make && - sudo make install && - cd ../.. && - rm -rf kcov-master && - cargo clean && - cargo test --no-run && - for file in target/debug/*-*[^\.d]; do mkdir -p "target/cov/$(basename $file)"; kcov --exclude-pattern=/.cargo,/usr/lib --verify "target/cov/$(basename $file)" "$file"; done && - kcov --coveralls-id=$TRAVIS_JOB_ID --merge target/cov target/cov/* && - echo "Uploaded code coverage" -env: - global: - - TRAVIS_CARGO_NIGHTLY_FEATURE=nightly \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 61627a3..88a1454 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -40,6 +40,9 @@ rand = "0.7" crossbeam = "0.7" csv = "1.1" levenshtein = "1.0" +[dependencies.indexmap] +version = "1.7.0" +features = ["serde-1"] [profile.release] opt-level = 3 diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index ab3b605..f9e8906 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -74,7 +74,8 @@ //! ``` //! -use std::collections::BTreeMap; +// use std::collections::BTreeMap; +use indexmap::{IndexMap, serde_seq}; use crate::configs::Configs; use crate::Profile; use crate::engine::{Engine, EngineContainer}; @@ -89,12 +90,14 @@ use csv; use std::error::Error; use csv::WriterBuilder; use serde_json; +use serde_json::{Value}; use std::sync::mpsc::{Sender, Receiver}; use std::sync::mpsc; use std::thread; -type ProfilesMap = BTreeMap; +// type ProfilesMap = BTreeMap; +type ProfilesMap = IndexMap; #[derive(Serialize, Deserialize, Debug)] /// Represents the Parser for sample data to be used @@ -103,7 +106,8 @@ pub struct DataSampleParser{ pub issues: bool, /// Configs object that define the configuration settings cfg: Option, - /// List of Profiles objects identified by a unique profile name BTreeMap + /// List of Profiles objects identified by a unique profile name LinkedHashMap + #[serde(with = "indexmap::serde_seq")] profiles: ProfilesMap, } @@ -206,6 +210,23 @@ impl DataSampleParser { }, }; + let dsp: Value = serde_json::from_str(&serialized).unwrap(); + let prfils = dsp.get("profiles").unwrap(); + match prfils.is_array() { + true => { + println!("version 0.3.0"); + }, + false => { + println!("version 0.2.1"); + + let pm:ProfilesMap = ProfilesMap::new(); + + for prf in prfils.as_object().iter() { + pm.insert(prf.get("id").unwrap().to_string(), prf); + } + }, + } + serde_json::from_str(&serialized).unwrap() } @@ -265,48 +286,6 @@ impl DataSampleParser { // Multi-Threading END } - /// This function analyzes sample data that is a csv formatted file and returns a boolean if successful. - /// _NOTE:_ The csv properties are as follows: - /// + headers are included as first line - /// + double quote wrap text - /// + double quote escapes is enabled - /// + delimiter is a comma - /// - /// - /// # Arguments - /// - /// * `path: &String` - The full path name of the csv formatted sample data file.
- /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(),1); - /// } - /// ``` - pub fn analyze_csv_file(&mut self, path: &String) -> Result { - info!("Starting to analyzed the csv file {}",path); - - let mut file = (File::open(path).map_err(|e| { - error!("csv file {} couldn't be opened!",path); - e.to_string() - }))?; - - let mut data = String::new(); - file.read_to_string(&mut data).map_err(|e| { - error!("csv file {} couldn't be read!",path); - e.to_string() - }).unwrap(); - self.analyze_csv_data(&data) - } - /// This function analyzes sample data that is a csv formatted string and returns a boolean if successful. /// _NOTE:_ The csv properties are as follows: /// + headers are included as first line @@ -351,8 +330,8 @@ impl DataSampleParser { .from_reader(data.as_bytes()); //iterate through the headers - for headers in rdr.headers() { - for header in headers.iter() { + for headers in rdr.headers() { + for header in headers.iter(){ //add a Profile to the list of profiles to represent the field (indexed using the header label) let p = Profile::new_with_id(format!("{}",header)); self.profiles.insert(format!("{}",header), p); @@ -361,7 +340,6 @@ impl DataSampleParser { //create a Vec from all the keys (headers) in the profiles list let profile_keys: Vec<_> = self.profiles.keys().cloned().collect(); - //let mut rec_cnt: u16 = ::min_value(); debug!("CSV headers: {:?}",profile_keys); @@ -378,6 +356,49 @@ impl DataSampleParser { self.profiles.iter_mut().for_each(|p|p.1.pre_generate()); Ok(1) + } + + /// This function analyzes sample data that is a csv formatted file and returns a boolean if successful. + /// _NOTE:_ The csv properties are as follows: + /// + headers are included as first line + /// + double quote wrap text + /// + double quote escapes is enabled + /// + delimiter is a comma + /// + /// + /// # Arguments + /// + /// * `path: &String` - The full path name of the csv formatted sample data file.
+ /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(),1); + /// } + /// ``` + pub fn analyze_csv_file(&mut self, path: &String) -> Result { + info!("Starting to analyzed the csv file {}",path); + + let mut file = (File::open(path).map_err(|e| { + error!("csv file {} couldn't be opened!",path); + e.to_string() + }))?; + + let mut data = String::new(); + file.read_to_string(&mut data).map_err(|e| { + error!("csv file {} couldn't be read!",path); + e.to_string() + }).unwrap(); + + self.analyze_csv_data(&data) } /// This function generates date as strings using the a `demo` profile @@ -745,6 +766,16 @@ mod tests { let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); println!("Sample data is [{:?}]", dsp.generate_record()[0]); + assert_eq!(dsp.generate_record()[0], "OK".to_string()); + } + + #[test] + // ensure the Data Sample Parser can be restored from archived file that + // was saved using version 0.2.1 + fn test_from_file_v021(){ + let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-dsp")); + println!("Sample data is [{:?}]", dsp.generate_record()[0]); + assert_eq!(dsp.generate_record()[0], "OK".to_string()); } @@ -759,6 +790,22 @@ mod tests { assert_eq!(headers.len(), 2); } + #[test] + // ensure the Data Sample Parser can read all the headers from teh csv file + fn test_read_headers_order(){ + let mut expected = Vec::new(); + expected.push("column-Z"); + expected.push("column-D",); + expected.push("column-A",); + expected.push("column-G"); + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-02.csv")).unwrap(); + let headers = dsp.extract_headers(); + + assert_eq!(headers, expected); + } + #[test] // ensure DataSampleParser can analyze a csv formatted file fn test_parse_csv_file(){ diff --git a/src/lib.rs b/src/lib.rs index d82f89b..82cc077 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -141,6 +141,7 @@ extern crate rand; extern crate crossbeam; extern crate csv; extern crate levenshtein; +extern crate indexmap; use crate::engine::{Fact, PatternDefinition}; use std::collections::BTreeMap; diff --git a/tests/samples/sample-0.2.1-dsp.json b/tests/samples/sample-0.2.1-dsp.json new file mode 100644 index 0000000..d9c86c6 --- /dev/null +++ b/tests/samples/sample-0.2.1-dsp.json @@ -0,0 +1 @@ +{"issues":false,"cfg":null,"profiles":{"status":{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}}} \ No newline at end of file diff --git a/tests/samples/sample-00-dsp.json b/tests/samples/sample-00-dsp.json index d9c86c6..3f6146d 100644 --- a/tests/samples/sample-00-dsp.json +++ b/tests/samples/sample-00-dsp.json @@ -1 +1 @@ -{"issues":false,"cfg":null,"profiles":{"status":{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}}} \ No newline at end of file +{"issues":false,"cfg":null,"profiles":[["status",{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}]]} \ No newline at end of file diff --git a/tests/samples/sample-02.csv b/tests/samples/sample-02.csv new file mode 100644 index 0000000..b4c2e30 --- /dev/null +++ b/tests/samples/sample-02.csv @@ -0,0 +1,6 @@ +"column-Z","column-D","column-A","column-G" +"z102","dAAA1","999A",123456 +"z203","dBBB2","888A",234567 +"z304","dBBB3","777A",345678 +"z405","dBBB4","666A",456789 +"z501","dBBB5","555A",567890 \ No newline at end of file From ffb33eeec1a80a29bf3c0ad1a1ebb3c3a06b922d Mon Sep 17 00:00:00 2001 From: dsietz Date: Sat, 13 Nov 2021 19:14:51 -0500 Subject: [PATCH 02/13] issue #90 --- src/data_sample_parser.rs | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index f9e8906..360c1f2 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -75,7 +75,8 @@ //! // use std::collections::BTreeMap; -use indexmap::{IndexMap, serde_seq}; +use indexmap::{IndexMap}; +use serde_json::map::Map; use crate::configs::Configs; use crate::Profile; use crate::engine::{Engine, EngineContainer}; @@ -90,7 +91,7 @@ use csv; use std::error::Error; use csv::WriterBuilder; use serde_json; -use serde_json::{Value}; +use serde_json::{json, Value}; use std::sync::mpsc::{Sender, Receiver}; use std::sync::mpsc; @@ -210,24 +211,34 @@ impl DataSampleParser { }, }; - let dsp: Value = serde_json::from_str(&serialized).unwrap(); + let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); let prfils = dsp.get("profiles").unwrap(); match prfils.is_array() { true => { println!("version 0.3.0"); + return serde_json::from_str(&serialized).unwrap(); }, false => { println!("version 0.2.1"); - let pm:ProfilesMap = ProfilesMap::new(); + let mut pm:ProfilesMap = ProfilesMap::new(); for prf in prfils.as_object().iter() { - pm.insert(prf.get("id").unwrap().to_string(), prf); + println!("{:?}", prf); + let id = prf.get("id").unwrap().as_str().unwrap().to_string(); + let serl = &serde_json::to_string(prf).unwrap(); + println!("{:?} : {:?}",id, serl); + pm.insert(id, Profile::from_serialized(serl)); } + + let mut rtn = DataSampleParser::new(); + rtn.issues = false; + rtn.cfg = None; + rtn.profiles = pm; + + return rtn; }, } - - serde_json::from_str(&serialized).unwrap() } fn analyze_columns(&mut self, profile_keys: Vec, columns: Vec>) { From 29fa24bb7666edaf9d7b37377186d938500e6035 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 11:30:25 -0500 Subject: [PATCH 03/13] issue #90 --- Cargo.toml | 2 +- src/data_sample_parser.rs | 80 +++++++++++++++++------ tests/samples/empty-dsp.json | 2 +- tests/samples/sample-0.2.1-dsp.json | 2 +- tests/samples/sample-0.2.1-nocfg-dsp.json | 1 + tests/samples/sample-01-dsp.json | 2 +- 6 files changed, 66 insertions(+), 23 deletions(-) create mode 100644 tests/samples/sample-0.2.1-nocfg-dsp.json diff --git a/Cargo.toml b/Cargo.toml index 88a1454..8d78c32 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "test-data-generation" -version = "0.2.1" +version = "0.3.0" edition = "2018" authors = ["dsietz "] repository = "https://github.com/dsietz/test-data-generation.git" diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index 360c1f2..9c55e4b 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -211,34 +211,50 @@ impl DataSampleParser { }, }; + // Support backwards compatibility for DSP saved using prior versions let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); let prfils = dsp.get("profiles").unwrap(); + match prfils.is_array() { true => { - println!("version 0.3.0"); + debug!("Version 0.3.0 detected. Using latest version"); return serde_json::from_str(&serialized).unwrap(); }, false => { - println!("version 0.2.1"); + info!("Prior version 0.2.1 detected. Trying to upgrade to latest version"); - let mut pm:ProfilesMap = ProfilesMap::new(); - - for prf in prfils.as_object().iter() { - println!("{:?}", prf); - let id = prf.get("id").unwrap().as_str().unwrap().to_string(); - let serl = &serde_json::to_string(prf).unwrap(); - println!("{:?} : {:?}",id, serl); - pm.insert(id, Profile::from_serialized(serl)); - } + return Self::updgrade_to_latest_version(serialized); + }, + } + } - let mut rtn = DataSampleParser::new(); - rtn.issues = false; - rtn.cfg = None; - rtn.profiles = pm; + fn updgrade_to_latest_version(serialized: String) -> DataSampleParser { + let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); + let prfils = dsp.get("profiles").unwrap(); + let mut pm:ProfilesMap = ProfilesMap::new(); + let issues = dsp.get("issues").unwrap().as_bool().unwrap(); + + for prf in prfils.as_object().iter() { + for attr in prf.keys() { + let id = prf.get(attr).unwrap().as_object().unwrap().get("id").unwrap().as_str().unwrap().to_string(); + let serl = &serde_json::to_string(prf.get(attr).unwrap()).unwrap(); + println!("{:?} : {:?}",id, serl); + pm.insert(id, Profile::from_serialized(serl)); + } + } - return rtn; + let mut rtn = match dsp.get("cfg").unwrap() { + Null => { + DataSampleParser::new() }, - } + _ => { + DataSampleParser::new_with(&dsp.get("cfg").unwrap().as_object().unwrap().get("file").unwrap().as_str().unwrap().to_string()) + }, + }; + + rtn.issues = issues; + rtn.profiles = pm; + return rtn; } fn analyze_columns(&mut self, profile_keys: Vec, columns: Vec>) { @@ -771,6 +787,22 @@ mod tests { use std::fs::File; use std::io::BufReader; + #[test] + // ensure a new Data Sample Parser can be created + fn test_new(){ + let dsp = DataSampleParser::new(); + + assert!(true); + } + + #[test] + // ensure a new Data Sample Parser can be created with configurations + fn test_new_with(){ + let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); + + assert!(true); + } + #[test] // ensure the Data Sample Parser can be restored from archived file fn test_from_file(){ @@ -782,11 +814,21 @@ mod tests { #[test] // ensure the Data Sample Parser can be restored from archived file that - // was saved using version 0.2.1 - fn test_from_file_v021(){ + // was saved using version 0.2.1 using a configuration + fn test_from_file_v021_with_cfg(){ let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-dsp")); println!("Sample data is [{:?}]", dsp.generate_record()[0]); + assert_eq!(dsp.generate_record()[0], "OK".to_string()); + } + + #[test] + // ensure the Data Sample Parser can be restored from archived file that + // was saved using version 0.2.1 without a configuration + fn test_from_file_v021_no_cfg(){ + let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-nocfg-dsp")); + println!("Sample data is [{:?}]", dsp.generate_record()[0]); + assert_eq!(dsp.generate_record()[0], "OK".to_string()); } diff --git a/tests/samples/empty-dsp.json b/tests/samples/empty-dsp.json index 616e1b3..78bab75 100644 --- a/tests/samples/empty-dsp.json +++ b/tests/samples/empty-dsp.json @@ -1 +1 @@ -{"issues":false,"cfg":null,"profiles":{}} \ No newline at end of file +{"issues":false,"cfg":null,"profiles":[]} \ No newline at end of file diff --git a/tests/samples/sample-0.2.1-dsp.json b/tests/samples/sample-0.2.1-dsp.json index d9c86c6..c4e92d7 100644 --- a/tests/samples/sample-0.2.1-dsp.json +++ b/tests/samples/sample-0.2.1-dsp.json @@ -1 +1 @@ -{"issues":false,"cfg":null,"profiles":{"status":{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}}} \ No newline at end of file +{"issues":false,"cfg":{"file":"./config/tdg.yaml"},"profiles":{"status":{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}}} \ No newline at end of file diff --git a/tests/samples/sample-0.2.1-nocfg-dsp.json b/tests/samples/sample-0.2.1-nocfg-dsp.json new file mode 100644 index 0000000..d9c86c6 --- /dev/null +++ b/tests/samples/sample-0.2.1-nocfg-dsp.json @@ -0,0 +1 @@ +{"issues":false,"cfg":null,"profiles":{"status":{"id":"status","patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[["VC",100.0]],"pattern_ranks":[["VC",100.0]],"sizes":{"2":1},"size_total":1,"size_ranks":[[2,100.0]],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]}}} \ No newline at end of file diff --git a/tests/samples/sample-01-dsp.json b/tests/samples/sample-01-dsp.json index bbf2bcc..32ef2e4 100644 --- a/tests/samples/sample-01-dsp.json +++ b/tests/samples/sample-01-dsp.json @@ -1 +1 @@ -{"issues":false,"cfg":null,"profiles":{"firstname":{"id":"firstname","patterns":{"Vccc":1,"Vccvc":1,"Vccvv":1,"Vvcvc":2},"pattern_total":5,"pattern_keys":["Vccc","Vccvc","Vccvv","Vvcvc"],"pattern_vals":[1,1,1,2],"pattern_percentages":[["Vvcvc",40.0],["Vccc",20.0],["Vccvc",20.0],["Vccvv",20.0]],"pattern_ranks":[["Vvcvc",40.0],["Vccc",60.0],["Vccvc",80.0],["Vccvv",100.0]],"sizes":{"4":1,"5":4},"size_total":5,"size_ranks":[[5,80.0],[4,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"y","prior_key":"e","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"i","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4}],[{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"b","prior_key":"b","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"i","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"e","prior_key":"b","next_key":"y","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"i","prior_key":"b","next_key":"e","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3}]]},"lastname":{"id":"lastname","patterns":{"Vvcc":1,"Vvccv":1,"Vvccvcc":1,"Vvcvcc":1,"Vvcvvcc":1},"pattern_total":5,"pattern_keys":["Vvcc","Vvccv","Vvccvcc","Vvcvcc","Vvcvvcc"],"pattern_vals":[1,1,1,1,1],"pattern_percentages":[["Vvcc",20.0],["Vvccv",20.0],["Vvccvcc",20.0],["Vvcvcc",20.0],["Vvcvvcc",20.0]],"pattern_ranks":[["Vvcc",20.0],["Vvccv",40.0],["Vvccvcc",60.0],["Vvcvcc",80.0],["Vvcvvcc",100.0]],"sizes":{"4":1,"5":1,"6":1,"7":2},"size_total":5,"size_ranks":[[7,40.0],[4,60.0],[5,80.0],[6,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"a","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"r","prior_key":"e","next_key":"g","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"l","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"r","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"g","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"r","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"g","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":5},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"d","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"n","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"k","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"g","prior_key":"a","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"b","prior_key":"a","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"a","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"a","next_key":"l","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"n","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"k","prior_key":"a","next_key":"r","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"a","prior_key":"g","next_key":"a","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"l","prior_key":"d","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3},{"key":"r","prior_key":"k","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3}]]}}} \ No newline at end of file +{"issues":false,"cfg":null,"profiles":[["firstname",{"id":"firstname","patterns":{"Vccc":1,"Vccvc":1,"Vccvv":1,"Vvcvc":2},"pattern_total":5,"pattern_keys":["Vccc","Vccvc","Vccvv","Vvcvc"],"pattern_vals":[1,1,1,2],"pattern_percentages":[["Vvcvc",40.0],["Vccc",20.0],["Vccvc",20.0],["Vccvv",20.0]],"pattern_ranks":[["Vvcvc",40.0],["Vccc",60.0],["Vccvc",80.0],["Vccvv",100.0]],"sizes":{"4":1,"5":4},"size_total":5,"size_ranks":[[5,80.0],[4,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"i","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"y","prior_key":"e","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"i","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"i","prior_key":"b","next_key":"e","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"y","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3}]]}],["lastname",{"id":"lastname","patterns":{"Vvcc":1,"Vvccv":1,"Vvccvcc":1,"Vvcvcc":1,"Vvcvvcc":1},"pattern_total":5,"pattern_keys":["Vvcc","Vvccv","Vvccvcc","Vvcvcc","Vvcvvcc"],"pattern_vals":[1,1,1,1,1],"pattern_percentages":[["Vvcc",20.0],["Vvccv",20.0],["Vvccvcc",20.0],["Vvcvcc",20.0],["Vvcvvcc",20.0]],"pattern_ranks":[["Vvcc",20.0],["Vvccv",40.0],["Vvccvcc",60.0],["Vvcvcc",80.0],["Vvcvvcc",100.0]],"sizes":{"4":1,"5":1,"6":1,"7":2},"size_total":5,"size_ranks":[[7,40.0],[4,60.0],[5,80.0],[6,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"a","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"r","prior_key":"e","next_key":"g","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"r","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"l","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"g","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"r","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"g","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":5},{"key":"a","prior_key":"A","next_key":"k","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"d","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"n","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5}],[{"key":"g","prior_key":"a","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"b","prior_key":"a","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"k","prior_key":"a","next_key":"r","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"a","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"a","next_key":"l","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"n","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6}],[{"key":"a","prior_key":"g","next_key":"a","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"r","prior_key":"k","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"l","prior_key":"d","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3}]]}]]} \ No newline at end of file From 93550dfb92fe10adc83dfb6529540d088cc362de Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 11:41:57 -0500 Subject: [PATCH 04/13] Issue #90 --- Cargo.toml | 2 +- examples/01_demo.rs | 31 +- examples/02_demo.rs | 26 +- examples/03_demo.rs | 80 +- src/configs.rs | 395 +++--- src/data_sample_parser.rs | 2040 ++++++++++++++++-------------- src/engine/mod.rs | 672 +++++----- src/lib.rs | 1762 +++++++++++++------------- src/macros.rs | 213 ++-- src/shared.rs | 54 +- tests/engine.rs | 20 +- tests/integration_test.rs | 74 +- tests/performance_tests.rs | 66 +- tests/samples/sample-01-dsp.json | 2 +- 14 files changed, 2801 insertions(+), 2636 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8d78c32..ac1a148 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ maintenance = {status = "passively-maintained"} config = "0.10" lazy_static = "1.4" log = "0.4" -log4rs = "0.10" +log4rs = "1.0" serde = "1.0" serde_derive = "1.0" serde_json = "1.0" diff --git a/examples/01_demo.rs b/examples/01_demo.rs index cfcb442..44934dd 100644 --- a/examples/01_demo.rs +++ b/examples/01_demo.rs @@ -1,16 +1,15 @@ -extern crate test_data_generation; - -use test_data_generation::data_sample_parser::DataSampleParser; - -fn main() { - - // This example demonstrates the basic feature of the library to generate dates and people's names from the built-in demo data sets - // using demo sample that is included in the library. - - // initalize a new DataSampelParser - let dsp = DataSampleParser::new(); - - // generate some test data using the demo functions - println!("generate date:{}", dsp.demo_date()); - println!("generate person:{}", dsp.demo_person_name()); -} \ No newline at end of file +extern crate test_data_generation; + +use test_data_generation::data_sample_parser::DataSampleParser; + +fn main() { + // This example demonstrates the basic feature of the library to generate dates and people's names from the built-in demo data sets + // using demo sample that is included in the library. + + // initalize a new DataSampelParser + let dsp = DataSampleParser::new(); + + // generate some test data using the demo functions + println!("generate date:{}", dsp.demo_date()); + println!("generate person:{}", dsp.demo_person_name()); +} diff --git a/examples/02_demo.rs b/examples/02_demo.rs index 94f3135..d8f24bb 100644 --- a/examples/02_demo.rs +++ b/examples/02_demo.rs @@ -1,17 +1,21 @@ extern crate test_data_generation; use test_data_generation::data_sample_parser::DataSampleParser; - + fn main() { + // This example demonstrates the basic feature of the library to generate dates and people's names from a CSV file + // using sample data that is included in the library. + + // initalize a new DataSampelParser + let mut dsp = DataSampleParser::new(); - // This example demonstrates the basic feature of the library to generate dates and people's names from a CSV file - // using sample data that is included in the library. - - // initalize a new DataSampelParser - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")).unwrap(); + dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")) + .unwrap(); - // generate some test data using the demo functions - println!("My new name is {} {}", dsp.generate_record()[0], dsp.generate_record()[1]); -} \ No newline at end of file + // generate some test data using the demo functions + println!( + "My new name is {} {}", + dsp.generate_record()[0], + dsp.generate_record()[1] + ); +} diff --git a/examples/03_demo.rs b/examples/03_demo.rs index 7650410..4127edd 100644 --- a/examples/03_demo.rs +++ b/examples/03_demo.rs @@ -1,52 +1,52 @@ extern crate test_data_generation; use std::path::Path; -use test_data_generation::{Profile}; - +use test_data_generation::Profile; + fn main() { - // This example demonstrates the ability to conitnuously add new analyzed data to an existing profile. - let profile_file = "./tests/samples/demo-03"; - let mut profile = match Path::new(profile_file).exists() { - true => { + // This example demonstrates the ability to conitnuously add new analyzed data to an existing profile. + let profile_file = "./tests/samples/demo-03"; + let mut profile = match Path::new(profile_file).exists() { + true => { // use existing file Profile::from_file(profile_file) - }, - false => { + } + false => { // create new file println!("Creating new profile: {}", profile_file); Profile::new_with_id("demo-03".to_string()) - }, - }; + } + }; - // analyze the first data set and save the profile. - profile.analyze("Jonny"); - profile.analyze("Jon"); - profile.analyze("Johnathon"); - profile.analyze("John"); - profile.analyze("Jonathon"); - profile.pre_generate(); - profile.save(&profile_file).unwrap(); - println!("My new name is {}", profile.generate().to_string()); + // analyze the first data set and save the profile. + profile.analyze("Jonny"); + profile.analyze("Jon"); + profile.analyze("Johnathon"); + profile.analyze("John"); + profile.analyze("Jonathon"); + profile.pre_generate(); + profile.save(&profile_file).unwrap(); + println!("My new name is {}", profile.generate().to_string()); - // analyze the second data set and add it to the saved profile. - let mut profile2 = Profile::from_file(profile_file); - profile2.analyze("Chris"); - profile2.analyze("Kris"); - profile2.analyze("Christopher"); - profile2.analyze("Christian"); - profile2.analyze("Krissy"); - profile2.pre_generate(); - profile2.save(&profile_file).unwrap(); - println!("My new name is {}", profile2.generate().to_string()); + // analyze the second data set and add it to the saved profile. + let mut profile2 = Profile::from_file(profile_file); + profile2.analyze("Chris"); + profile2.analyze("Kris"); + profile2.analyze("Christopher"); + profile2.analyze("Christian"); + profile2.analyze("Krissy"); + profile2.pre_generate(); + profile2.save(&profile_file).unwrap(); + println!("My new name is {}", profile2.generate().to_string()); - // analyze the third data set and add it to the saved profile. - let mut profile3 = Profile::from_file(profile_file); - profile3.analyze("Dan"); - profile3.analyze("Danny"); - profile3.analyze("Danyl"); - profile3.analyze("Dannie"); - profile3.analyze("Danathon"); - profile3.pre_generate(); - profile3.save(&profile_file).unwrap(); - println!("My new name is {}", profile3.generate().to_string()); -} \ No newline at end of file + // analyze the third data set and add it to the saved profile. + let mut profile3 = Profile::from_file(profile_file); + profile3.analyze("Dan"); + profile3.analyze("Danny"); + profile3.analyze("Danyl"); + profile3.analyze("Dannie"); + profile3.analyze("Danathon"); + profile3.pre_generate(); + profile3.save(&profile_file).unwrap(); + println!("My new name is {}", profile3.generate().to_string()); +} diff --git a/src/configs.rs b/src/configs.rs index 148a31e..188c84c 100644 --- a/src/configs.rs +++ b/src/configs.rs @@ -1,196 +1,199 @@ -//! The `configs` module provides functionality for the library to read configuration settings that the user can set in their implementation. -//! -//! # Examples -//! -//! -//! Generate some demo test data ... -//! -//! ``` -//! extern crate test_data_generation; -//! -//! use test_data_generation::configs::Configs; -//! -//! fn main() { -//! // initalize a new Configs -//! let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); -//! cfg.load_config_file(); -//! -//! // verify the configuration file has been loaded -//! println!("{:?}", cfg); -//! } -//! ``` - -//use std::path::Path; -use std::fs::File; -use std::io::prelude::*; -use yaml_rust::YamlLoader; -use serde_json; - -#[derive(Serialize, Deserialize, Debug)] -// Represents a Configs object that can be set by an implementation of the test data generation library -pub struct Configs{ - /// the file path of the test data generation library configuration file - file: String, -} - -impl Configs { - /// Constructs a new Configs - /// - /// # Arguments - /// - /// * `path: &String - The full path name (including the file name and extension) to the configuration file.
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::configs::Configs; - /// - /// fn main() { - /// // initalize a new Configs - /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - /// cfg.load_config_file(); - /// - /// // verify the configuration file has been loaded - /// println!("{:?}", cfg); - /// } - /// ``` - pub fn new(path: &String) -> Configs { - let pth = path.to_string().to_owned(); - Configs{ - file: pth, - } - } - - /// Constructs a new Configs object from a serialized (JSON) string. This is used when restoring from "archive" - /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::configs::Configs; - /// - /// fn main() { - /// let serialized = "{\"file\":\"./tests/config/tdg.yaml\"}"; - /// let mut cfg = Configs::from_serialized(&serialized); - /// - /// assert_eq!(cfg.get_config_file_path(), "./tests/config/tdg.yaml"); - /// } - /// ``` - pub fn from_serialized(serialized: &str) -> Configs { - serde_json::from_str(&serialized).unwrap() - } - - /// Loads the configuration file using the path that was provided during calling a new Configs object - /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::configs::Configs; - /// - /// fn main() { - /// // initalize a new Configs - /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - /// - /// // verify the configuration file path was set - /// println!("The configuration fiel is located at {}", cfg.get_config_file_path()); - /// } - /// ``` - pub fn get_config_file_path(&self) -> &str{ - &self.file - } - - /// Loads the configuration file using the path that was provided during calling a new Configs object - /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::configs::Configs; - /// - /// fn main() { - /// // initalize a new Configs - /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - /// cfg.load_config_file(); - /// - /// // verify the configuration file has been loaded - /// println!("{:?}", cfg); - /// } - /// ``` - pub fn load_config_file(&mut self){ - let mut f = File::open(&self.file).expect(&format!("Error: Configuration file not found at {}", &self.file.to_string())); - let mut contents = String::new(); - f.read_to_string(&mut contents).expect("Something went wrong reading file"); - let _cfg_yaml = &YamlLoader::load_from_str(&*contents).expect("failed to load YAML file")[0]; - //println!("{:?}", cfg); - } - - /// This function converts the Configs object to a serialize JSON string. - /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::configs::Configs; - /// - /// fn main() { - /// //create a Configs object from a configuration file - /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - /// cfg.load_config_file(); - /// - /// println!("{}", cfg.serialize()); - /// // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2} - /// } - /// - pub fn serialize(&mut self) ->String { - serde_json::to_string(&self).unwrap() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - // ensure Configs reads a valid configuration file - fn create_config_good_cfg_file(){ - let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - - cfg.load_config_file(); - } - - #[test] - #[should_panic(expected = "Error: Configuration file not found at ./badpath/tdg.yaml")] - // ensure Configs errors when reading an invalid configuration file - fn create_config_bad_cfg_file(){ - let mut cfg = Configs::new(&String::from("./badpath/tdg.yaml")); - - cfg.load_config_file(); - } - - #[test] - fn new_fact_from_serialized(){ - let serialized = "{\"file\":\"./tests/config/tdg.yaml\"}"; - let cfg = Configs::from_serialized(&serialized); - - assert_eq!(cfg.get_config_file_path(), "./tests/config/tdg.yaml"); - } - - #[test] - // ensure a Configs object can be exported (to be archived) as JSON - fn serialize(){ - let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); - cfg.load_config_file(); - - let serialized = cfg.serialize(); - println!("serialized : {}",serialized); - - assert_eq!(serialized,"{\"file\":\"./tests/config/tdg.yaml\"}"); - } -} +//! The `configs` module provides functionality for the library to read configuration settings that the user can set in their implementation. +//! +//! # Examples +//! +//! +//! Generate some demo test data ... +//! +//! ``` +//! extern crate test_data_generation; +//! +//! use test_data_generation::configs::Configs; +//! +//! fn main() { +//! // initalize a new Configs +//! let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); +//! cfg.load_config_file(); +//! +//! // verify the configuration file has been loaded +//! println!("{:?}", cfg); +//! } +//! ``` + +//use std::path::Path; +use serde_json; +use std::fs::File; +use std::io::prelude::*; +use yaml_rust::YamlLoader; + +#[derive(Serialize, Deserialize, Debug)] +// Represents a Configs object that can be set by an implementation of the test data generation library +pub struct Configs { + /// the file path of the test data generation library configuration file + file: String, +} + +impl Configs { + /// Constructs a new Configs + /// + /// # Arguments + /// + /// * `path: &String - The full path name (including the file name and extension) to the configuration file.
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::configs::Configs; + /// + /// fn main() { + /// // initalize a new Configs + /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + /// cfg.load_config_file(); + /// + /// // verify the configuration file has been loaded + /// println!("{:?}", cfg); + /// } + /// ``` + pub fn new(path: &String) -> Configs { + let pth = path.to_string().to_owned(); + Configs { file: pth } + } + + /// Constructs a new Configs object from a serialized (JSON) string. This is used when restoring from "archive" + /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::configs::Configs; + /// + /// fn main() { + /// let serialized = "{\"file\":\"./tests/config/tdg.yaml\"}"; + /// let mut cfg = Configs::from_serialized(&serialized); + /// + /// assert_eq!(cfg.get_config_file_path(), "./tests/config/tdg.yaml"); + /// } + /// ``` + pub fn from_serialized(serialized: &str) -> Configs { + serde_json::from_str(&serialized).unwrap() + } + + /// Loads the configuration file using the path that was provided during calling a new Configs object + /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::configs::Configs; + /// + /// fn main() { + /// // initalize a new Configs + /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + /// + /// // verify the configuration file path was set + /// println!("The configuration fiel is located at {}", cfg.get_config_file_path()); + /// } + /// ``` + pub fn get_config_file_path(&self) -> &str { + &self.file + } + + /// Loads the configuration file using the path that was provided during calling a new Configs object + /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::configs::Configs; + /// + /// fn main() { + /// // initalize a new Configs + /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + /// cfg.load_config_file(); + /// + /// // verify the configuration file has been loaded + /// println!("{:?}", cfg); + /// } + /// ``` + pub fn load_config_file(&mut self) { + let mut f = File::open(&self.file).expect(&format!( + "Error: Configuration file not found at {}", + &self.file.to_string() + )); + let mut contents = String::new(); + f.read_to_string(&mut contents) + .expect("Something went wrong reading file"); + let _cfg_yaml = + &YamlLoader::load_from_str(&*contents).expect("failed to load YAML file")[0]; + //println!("{:?}", cfg); + } + + /// This function converts the Configs object to a serialize JSON string. + /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::configs::Configs; + /// + /// fn main() { + /// //create a Configs object from a configuration file + /// let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + /// cfg.load_config_file(); + /// + /// println!("{}", cfg.serialize()); + /// // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2} + /// } + /// + pub fn serialize(&mut self) -> String { + serde_json::to_string(&self).unwrap() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + // ensure Configs reads a valid configuration file + fn create_config_good_cfg_file() { + let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + + cfg.load_config_file(); + } + + #[test] + #[should_panic(expected = "Error: Configuration file not found at ./badpath/tdg.yaml")] + // ensure Configs errors when reading an invalid configuration file + fn create_config_bad_cfg_file() { + let mut cfg = Configs::new(&String::from("./badpath/tdg.yaml")); + + cfg.load_config_file(); + } + + #[test] + fn new_fact_from_serialized() { + let serialized = "{\"file\":\"./tests/config/tdg.yaml\"}"; + let cfg = Configs::from_serialized(&serialized); + + assert_eq!(cfg.get_config_file_path(), "./tests/config/tdg.yaml"); + } + + #[test] + // ensure a Configs object can be exported (to be archived) as JSON + fn serialize() { + let mut cfg = Configs::new(&String::from("./tests/config/tdg.yaml")); + cfg.load_config_file(); + + let serialized = cfg.serialize(); + println!("serialized : {}", serialized); + + assert_eq!(serialized, "{\"file\":\"./tests/config/tdg.yaml\"}"); + } +} diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index 9c55e4b..7cbc6b7 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -1,984 +1,1056 @@ -//! The `data_sample_parser` module provides functionality to read sample data, parse and analyze it, -//! so that test data can be generated based on profiles. -//! -//! # Examples -//! -//! -//! Generate some demo test data ... -//! -//! ``` -//! extern crate test_data_generation; -//! -//! use test_data_generation::data_sample_parser::DataSampleParser; -//! -//! fn main() { -//! // initalize a new DataSampelParser -//! let dsp = DataSampleParser::new(); -//! -//! // generate some test data using the demo functions -//! println!("generate date:{}", dsp.demo_date()); -//! println!("generate person:{}", dsp.demo_person_name()); -//! } -//! ``` -//! -//! Save the algorithm ... -//! -//! Archive (export) the data sample parser object so that you can reuse the algorithm to generate test data at a later time. -//! This enables you to persist the algorithm without having to store the actual data sample that was used to create the algorithm - -//! Which is important if you used 'real' data in your sample data. -//! -//! ``` -//! extern crate test_data_generation; -//! -//! use test_data_generation::data_sample_parser::DataSampleParser; -//! -//! fn main() { -//! // analyze the dataset -//! let mut dsp = DataSampleParser::new(); -//! -//! assert_eq!(dsp.save(&String::from("./tests/samples/empty-dsp")).unwrap(), true); -//! } -//! ``` -//! -//! Load an algorithm ... -//! -//! Create a data sample parser from a previously saved (exported) archive file so you can generate test data based on the algorithm.
-//! *NOTE:* In this example, there was only one data point in the data smaple that was analyzed (the word 'OK'). This was intentional -//! so the algorithm would be guaranteed to generate that same word. This was done ensure the assert_eq! returns true. -//! -//! ``` -//! extern crate test_data_generation; -//! -//! use test_data_generation::data_sample_parser::DataSampleParser; -//! -//! fn main() { -//! let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); -//! -//! assert_eq!(dsp.generate_record()[0], "OK".to_string()); -//! } -//! ``` -//! -//! You can also generate a new csv file based on the data sample provided. -//! -//! ``` -//! extern crate test_data_generation; -//! -//! use test_data_generation::data_sample_parser::DataSampleParser; -//! -//! fn main() { -//! let mut dsp = DataSampleParser::new(); -//! -//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); -//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); -//! } -//! ``` -//! - -// use std::collections::BTreeMap; -use indexmap::{IndexMap}; -use serde_json::map::Map; -use crate::configs::Configs; -use crate::Profile; -use crate::engine::{Engine, EngineContainer}; -use crate::shared::CsvManipulator; -use std::fs::File; -use std::io; -use std::io::Write; -use std::io::prelude::*; -use std::result::Result; -use csv; -//use csv::StringRecord; -use std::error::Error; -use csv::WriterBuilder; -use serde_json; -use serde_json::{json, Value}; - -use std::sync::mpsc::{Sender, Receiver}; -use std::sync::mpsc; -use std::thread; - -// type ProfilesMap = BTreeMap; -type ProfilesMap = IndexMap; - -#[derive(Serialize, Deserialize, Debug)] -/// Represents the Parser for sample data to be used -pub struct DataSampleParser{ - /// indicates if there were issues parsing and anlyzing the data sample - pub issues: bool, - /// Configs object that define the configuration settings - cfg: Option, - /// List of Profiles objects identified by a unique profile name LinkedHashMap - #[serde(with = "indexmap::serde_seq")] - profiles: ProfilesMap, -} - -impl CsvManipulator for DataSampleParser {} -impl Engine for DataSampleParser {} - -impl DataSampleParser { - /// Constructs a new DataSampleParser - /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let dsp = DataSampleParser::new(); - /// } - /// ``` - pub fn new() -> DataSampleParser { - - DataSampleParser{ - issues: false, - cfg: None, - profiles: ProfilesMap::new(), - } - } - - /// Constructs a new DataSampleParser - /// - /// # Arguments - /// - /// * `path: &String - The full path name (including the file name and extension) to the configuration file.
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// // param: the path to the configuration file - /// let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); - /// } - /// ``` - pub fn new_with(path: &String) -> DataSampleParser { - DataSampleParser{ - issues: false, - cfg: Some(Configs::new(path)), - profiles: ProfilesMap::new(), - } - } - - /// Constructs a new DataSampleParser from an exported JSON file. This is used when restoring from "archive" - /// - /// # Arguments - /// - /// * `path: &String` - The full path name of the json formatted Data Sample Parser archive file.
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); - /// - /// assert_eq!(dsp.generate_record()[0], "OK".to_string()); - /// } - /// ``` - pub fn from_file(path: &String) -> DataSampleParser { - // open the archive file - let mut file = match File::open(format!("{}.json",&path)) { - Err(_e) => { - error!("Could not open file {:?}", &path.to_string()); - panic!("Could not open file {:?}", &path.to_string()); - }, - Ok(f) => { - info!("Successfully opened file {:?}", &path.to_string()); - f - }, - }; - - //read the archive file - let mut serialized = String::new(); - match file.read_to_string(&mut serialized) { - Err(e) => { - error!("Could not read file {:?} because of {:?}", &path.to_string(), e.to_string()); - panic!("Could not read file {:?} because of {:?}", &path.to_string(), e.to_string()); - }, - Ok(s) => { - info!("Successfully read file {:?}", &path.to_string()); - s - }, - }; - - // Support backwards compatibility for DSP saved using prior versions - let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); - let prfils = dsp.get("profiles").unwrap(); - - match prfils.is_array() { - true => { - debug!("Version 0.3.0 detected. Using latest version"); - return serde_json::from_str(&serialized).unwrap(); - }, - false => { - info!("Prior version 0.2.1 detected. Trying to upgrade to latest version"); - - return Self::updgrade_to_latest_version(serialized); - }, - } - } - - fn updgrade_to_latest_version(serialized: String) -> DataSampleParser { - let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); - let prfils = dsp.get("profiles").unwrap(); - let mut pm:ProfilesMap = ProfilesMap::new(); - let issues = dsp.get("issues").unwrap().as_bool().unwrap(); - - for prf in prfils.as_object().iter() { - for attr in prf.keys() { - let id = prf.get(attr).unwrap().as_object().unwrap().get("id").unwrap().as_str().unwrap().to_string(); - let serl = &serde_json::to_string(prf.get(attr).unwrap()).unwrap(); - println!("{:?} : {:?}",id, serl); - pm.insert(id, Profile::from_serialized(serl)); - } - } - - let mut rtn = match dsp.get("cfg").unwrap() { - Null => { - DataSampleParser::new() - }, - _ => { - DataSampleParser::new_with(&dsp.get("cfg").unwrap().as_object().unwrap().get("file").unwrap().as_str().unwrap().to_string()) - }, - }; - - rtn.issues = issues; - rtn.profiles = pm; - return rtn; - } - - fn analyze_columns(&mut self, profile_keys: Vec, columns: Vec>) { - let col_cnt = columns.len(); - let (tx, rx): (Sender>, Receiver>) = mpsc::channel(); - let mut jobs = Vec::new(); - - //iterate through all the columns - for (idx, column) in columns.iter().enumerate() { - let thread_tx = tx.clone(); - let container = EngineContainer { - profile: self.profiles.get(&profile_keys[idx]).unwrap().clone(), - entities: column.to_vec(), - }; - - let job = thread::spawn(move || { - let result = Self::profile_entities_with_container(container); - thread_tx.send(result).unwrap(); - }); - - jobs.push(job); - } - - let mut results = Vec::with_capacity(col_cnt); - for _ in 0..col_cnt { - results.push(rx.recv()); - } - - for job in jobs { - job.join().expect("Error: Could not run the job"); - } - - for result in results { - match result { - Ok(msg) => { - //received from sender - match msg { - Ok(p) => { - let id = p.clone().id.unwrap(); - debug!("Profile {} has finished analyzing the entities.", id); - self.profiles.insert(id, p); - }, - Err(e) => { - error!("Profile wasn't able to analyzing the entities. Error: {}", e); - } - } - - }, - Err(e) => { - // could not receive from sender - error!("Receiver wasn't able to receive message from sender which was analyzing entities for the profile. Error: {}", e); - panic!("Receiver wasn't able to receive message from sender which was analyzing entities for the profile. Error: {}", e); - }, - } - } - // Multi-Threading END - } - - /// This function analyzes sample data that is a csv formatted string and returns a boolean if successful. - /// _NOTE:_ The csv properties are as follows: - /// + headers are included as first line - /// + double quote wrap text - /// + double quote escapes is enabled - /// + delimiter is a comma - /// - /// - /// # Arguments - /// - /// * `data: &String` - The textual content of a csv formatted sample data file.
- /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// let mut data = String::from(""); - /// data.push_str("\"firstname\",\"lastname\"\n"); - /// data.push_str("\"Aaron\",\"Aaberg\"\n"); - /// data.push_str("\"Aaron\",\"Aaby\"\n"); - /// data.push_str("\"Abbey\",\"Aadland\"\n"); - /// data.push_str("\"Abbie\",\"Aagaard\"\n"); - /// data.push_str("\"Abby\",\"Aakre\""); - /// - /// assert_eq!(dsp.analyze_csv_data(&data).unwrap(),1); - /// } - /// ``` - pub fn analyze_csv_data(&mut self, data: &String) -> Result { - debug!("Starting to analyzed the csv data {}",data); - - let mut rdr = csv::ReaderBuilder::new() - .has_headers(true) - .quote(b'"') - .double_quote(true) - .delimiter(b',') - .from_reader(data.as_bytes()); - - //iterate through the headers - for headers in rdr.headers() { - for header in headers.iter(){ - //add a Profile to the list of profiles to represent the field (indexed using the header label) - let p = Profile::new_with_id(format!("{}",header)); - self.profiles.insert(format!("{}",header), p); - } - } - - //create a Vec from all the keys (headers) in the profiles list - let profile_keys: Vec<_> = self.profiles.keys().cloned().collect(); - - debug!("CSV headers: {:?}",profile_keys); - - // Multi-Threading START - let columns = Self::read_as_columns(rdr); - //let col_cnt = columns.len(); - let rec_cnt = columns[0].len(); - self.analyze_columns(profile_keys, columns); - - debug!("Successfully analyzed the csv data"); - debug!("Analyzed {} records, {} fields", rec_cnt, self.profiles.len()); - - //prepare the profiles for data generation - self.profiles.iter_mut().for_each(|p|p.1.pre_generate()); - - Ok(1) - } - - /// This function analyzes sample data that is a csv formatted file and returns a boolean if successful. - /// _NOTE:_ The csv properties are as follows: - /// + headers are included as first line - /// + double quote wrap text - /// + double quote escapes is enabled - /// + delimiter is a comma - /// - /// - /// # Arguments - /// - /// * `path: &String` - The full path name of the csv formatted sample data file.
- /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(),1); - /// } - /// ``` - pub fn analyze_csv_file(&mut self, path: &String) -> Result { - info!("Starting to analyzed the csv file {}",path); - - let mut file = (File::open(path).map_err(|e| { - error!("csv file {} couldn't be opened!",path); - e.to_string() - }))?; - - let mut data = String::new(); - file.read_to_string(&mut data).map_err(|e| { - error!("csv file {} couldn't be read!",path); - e.to_string() - }).unwrap(); - - self.analyze_csv_data(&data) - } - - /// This function generates date as strings using the a `demo` profile - /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let dsp = DataSampleParser::new(); - /// - /// // generate some test data using the demo functions - /// println!("generate date:{}", dsp.demo_date()); - /// } - /// ``` - pub fn demo_date(&self) -> String{ - let mut profil = Profile::new(); - - profil.analyze("01/04/2017"); - profil.analyze("02/09/2017"); - profil.analyze("03/13/2017"); - profil.analyze("04/17/2017"); - profil.analyze("05/22/2017"); - profil.analyze("07/26/2017"); - profil.analyze("08/30/2017"); - profil.analyze("09/07/2017"); - profil.analyze("10/11/2017"); - profil.analyze("11/15/2017"); - profil.analyze("12/21/2017"); - profil.analyze("01/14/2016"); - profil.analyze("02/19/2016"); - profil.analyze("03/23/2016"); - profil.analyze("04/27/2016"); - profil.analyze("05/02/2016"); - profil.analyze("07/16/2015"); - profil.analyze("08/20/2015"); - profil.analyze("09/17/2015"); - profil.analyze("10/01/2014"); - profil.analyze("11/25/2014"); - profil.analyze("12/31/2018"); - - profil.pre_generate(); - //profil.apply_facts("##p##p####".to_string()) - profil.generate() - } - - /// This function generates people's names as strings using the a `demo` profile - /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let dsp = DataSampleParser::new(); - /// - /// // generate some test data using the demo functions - /// println!("generate date:{}", dsp.demo_person_name()); - /// } - pub fn demo_person_name(&self) -> String{ - let mut profil = Profile::new(); - - profil.analyze("Smith, John"); - profil.analyze("O'Brien, Henny"); - profil.analyze("Dale, Danny"); - profil.analyze("Rickets, Ronnae"); - profil.analyze("Richard, Richie"); - profil.analyze("Roberts, Blake"); - profil.analyze("Conways, Sephen"); - - profil.pre_generate(); - profil.generate() - } - - /// This function returns a vector of header names - /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - /// let headers = dsp.extract_headers(); - /// - /// assert_eq!(headers.len(), 2); - /// } - pub fn extract_headers(&mut self) -> Vec { - let mut headers = vec!(); - - for profile in self.profiles.iter_mut() { - headers.push(profile.0.to_string()); - } - - headers - } - - /// This function generates test data for the specified field name. - /// - /// # Arguments - /// - /// * `field: String` - The name of the field (e.g.: firstname) the represents the profile to use when generating the test data.
- /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - /// println!("Generated data for first name {}",dsp.generate_by_field_name("firstname".to_string())); - /// } - /// ``` - pub fn generate_by_field_name(&mut self, field: String) -> String { - self.profiles.get_mut(&field).unwrap().generate().to_string() - } - - /// This function Vec of generates test data fields. - /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - /// println!("Generated data record: {:?}",dsp.generate_record()); - /// } - /// ``` - pub fn generate_record(&mut self) -> Vec { - let mut record = Vec::new(); - - for profile in self.profiles.iter_mut() { - record.push(profile.1.generate().to_string()); - } - - record - } - - /// This function creates a csv file of generated test data. - /// Prior to calling this funciton, you need to call the analyze_csv_file() function. - /// _NOTE:_ The csv properties are as follows: - /// + headers are included as first line - /// + double quotes wrap text - /// + double quote escapes is enabled - /// + delimiter is a comma - /// - /// - /// # Arguments - /// - /// * `row_count: u32` - The number of rows to generate.
- /// * `path: &String` - The full path name where to save the csv file.
- /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// let mut dsp = DataSampleParser::new(); - /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - /// dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); - /// } - /// ``` - pub fn generate_csv(&mut self, row_count: u32, path: &String) -> Result<(), Box> { - info!("generating csv file {}", path); - - let mut wtr = (WriterBuilder::new() - .has_headers(true) - .quote(b'"') - .double_quote(true) - .delimiter(b',') - .from_path(path).map_err(|e| { - error!("csv file {} couldn't be created!",path); - e.to_string() - }))?; - - let headers = self.extract_headers(); - wtr.write_record(&headers)?; - - for _r in 0..row_count { - let mut record = Vec::new(); - - for profile in self.profiles.iter_mut() { - record.push(profile.1.generate()); - } - - wtr.write_record(&record)?; - } - - wtr.flush()?; - - Ok(()) - } - - /// This function calculates the levenshtein distance between 2 strings. - /// See: https://crates.io/crates/levenshtein - /// - /// # Arguments - /// - /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
- /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // analyze the dataset - /// let mut dsp = DataSampleParser::new(); - /// - /// assert_eq!(dsp.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); - /// } - /// - pub fn levenshtein_distance(&mut self, control: &String, experiment: &String) -> usize { - // https://docs.rs/levenshtein/1.0.3/levenshtein/fn.levenshtein.html - levenshtein_distance!(control, experiment) - } - - /// This function calculates the percent difference between 2 strings. - /// - /// # Arguments - /// - /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
- /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // analyze the dataset - /// let mut dsp = DataSampleParser::new(); - /// - /// assert_eq!(dsp.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); - /// } - /// - pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 { - //https://docs.rs/GSL/0.4.31/rgsl/statistics/fn.correlation.html - //http://www.statisticshowto.com/probability-and-statistics/correlation-coefficient-formula/ - // pearson's chi square test - // cosine similarity - http://blog.christianperone.com/2013/09/machine-learning-cosine-similarity-for-vector-space-models-part-iii/ - realistic_test!(control, experiment) - } - - /// This function returns a boolean that indicates if the data sample parsing had issues - /// - /// # Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // initalize a new DataSampelParser - /// // param: the path to the configuration file is wrong - /// let dsp = DataSampleParser::new_with(&String::from("./target/debug/config/tdg.yaml")); - /// - /// // generate some test data using the demo functions - /// assert_eq!(dsp.running_with_issues(), &false); - /// } - pub fn running_with_issues(&self) -> &bool{ - &self.issues - } - - /// This function saves (exports) the DataSampleParser to a JSON file. - /// This is useful when you wish to reuse the algorithm to generate more test data later. - /// - /// # Arguments - /// - /// * `field: &String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
- /// - /// #Errors - /// If this function encounters any form of I/O or other error, an error variant will be returned. - /// Otherwise, the function returns Ok(true).
- /// - /// #Example - /// - /// ``` - /// extern crate test_data_generation; - /// - /// use test_data_generation::data_sample_parser::DataSampleParser; - /// - /// fn main() { - /// // analyze the dataset - /// let mut dsp = DataSampleParser::new(); - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")).unwrap(); - /// - /// assert_eq!(dsp.save(&String::from("./tests/samples/sample-00-dsp")).unwrap(), true); - /// } - /// - pub fn save(&mut self, path: &String) -> Result { - let dsp_json = serde_json::to_string(&self).unwrap(); - - // Create the archive file - let mut file = match File::create(format!("{}.json",&path)) { - Err(e) => { - error!("Could not create file {:?}", &path.to_string()); - return Err(e); - }, - Ok(f) => { - info!("Successfully exported to {:?}", &path.to_string()); - f - }, - }; - - // Write the json string to file, returns io::Result<()> - match file.write_all(dsp_json.as_bytes()) { - Err(e) => { - error!("Could not write to file {}", &path.to_string()); - return Err(e); - }, - Ok(_) => { - info!("Successfully exported to {}", &path.to_string()); - }, - }; - - Ok(true) - } -} - - -#[cfg(test)] -mod tests { - use super::*; - use std::fs::File; - use std::io::BufReader; - - #[test] - // ensure a new Data Sample Parser can be created - fn test_new(){ - let dsp = DataSampleParser::new(); - - assert!(true); - } - - #[test] - // ensure a new Data Sample Parser can be created with configurations - fn test_new_with(){ - let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); - - assert!(true); - } - - #[test] - // ensure the Data Sample Parser can be restored from archived file - fn test_from_file(){ - let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); - println!("Sample data is [{:?}]", dsp.generate_record()[0]); - - assert_eq!(dsp.generate_record()[0], "OK".to_string()); - } - - #[test] - // ensure the Data Sample Parser can be restored from archived file that - // was saved using version 0.2.1 using a configuration - fn test_from_file_v021_with_cfg(){ - let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-dsp")); - println!("Sample data is [{:?}]", dsp.generate_record()[0]); - - assert_eq!(dsp.generate_record()[0], "OK".to_string()); - } - - #[test] - // ensure the Data Sample Parser can be restored from archived file that - // was saved using version 0.2.1 without a configuration - fn test_from_file_v021_no_cfg(){ - let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-nocfg-dsp")); - println!("Sample data is [{:?}]", dsp.generate_record()[0]); - - assert_eq!(dsp.generate_record()[0], "OK".to_string()); - } - - #[test] - // ensure the Data Sample Parser can read all the headers from teh csv file - fn test_read_headers(){ - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - let headers = dsp.extract_headers(); - - assert_eq!(headers.len(), 2); - } - - #[test] - // ensure the Data Sample Parser can read all the headers from teh csv file - fn test_read_headers_order(){ - let mut expected = Vec::new(); - expected.push("column-Z"); - expected.push("column-D",); - expected.push("column-A",); - expected.push("column-G"); - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-02.csv")).unwrap(); - let headers = dsp.extract_headers(); - - assert_eq!(headers, expected); - } - - #[test] - // ensure DataSampleParser can analyze a csv formatted file - fn test_parse_csv_file(){ - let mut dsp = DataSampleParser::new(); - - assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(), 1); - } - - #[test] - // ensure DataSampleParser can analyze a csv formatted text - fn test_parse_csv_data(){ - let mut dsp = DataSampleParser::new(); - let mut data = String::from(""); - data.push_str("\"firstname\",\"lastname\"\n"); - data.push_str("\"Aaron\",\"Aaberg\"\n"); - data.push_str("\"Aaron\",\"Aaby\"\n"); - data.push_str("\"Abbey\",\"Aadland\"\n"); - data.push_str("\"Abbie\",\"Aagaard\"\n"); - data.push_str("\"Abby\",\"Aakre\""); - - assert_eq!(dsp.analyze_csv_data(&data).unwrap(), 1); - } - - #[test] - // ensure DataSampleParser can analyze a csv formatted file - fn test_generate_field_from_csv_file(){ - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - println!("Generated data for first name {}",dsp.generate_by_field_name("firstname".to_string())); - } - - #[test] - // ensure DataSampleParser can analyze a csv formatted file - fn test_generate_record_from_csv_file(){ - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - assert_eq!(dsp.generate_record().len(), 2); - } - - #[test] - // ensure DataSampleParser can analyze a csv formatted file - fn test_parse_csv_file_bad(){ - let mut dsp = DataSampleParser::new(); - - assert_eq!(dsp.analyze_csv_file(&String::from("./badpath/sample-01.csv")).is_err(), true); - } - - #[test] - // ensure the DataSampleParser object can be saved to file - fn test_save(){ - let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")).unwrap(); - - assert_eq!(dsp.save(&String::from("./tests/samples/sample-00-dsp")).unwrap(), true); - } - - #[test] - // ensure the DataSampleParser object can recognize the difference between realistic data and unrealistic generated data - fn test_levenshtein_test(){ - let mut dsp = DataSampleParser::new(); - - assert_eq!(dsp.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); - } - - #[test] - // ensure the DataSampleParser object can recognize the difference between realistic data and unrealistic generated data - fn test_realistic_data_test(){ - let mut dsp = DataSampleParser::new(); - - assert_eq!(dsp.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); - } - - #[test] - // demo test - fn test_demo(){ - let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - - println!("My new name is {} {}", dsp.generate_record()[0], dsp.generate_record()[1]); - - assert!(true); - } - - #[test] - // ensure the DataSampleParser object can generate test data as a csv file - fn test_extract_headers_from_sample(){ - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - let headers = dsp.extract_headers(); - - assert_eq!(headers.len(), 2); - } - - #[test] - // ensure the DataSampleParser object can generate test data as a csv file - fn test_generate_csv_test_data_from_sample(){ - let mut dsp = DataSampleParser::new(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); - - let generated_row_count = match File::open(format!("{}","./tests/samples/generated-01.csv")) { - Err(_e) => { - 0 - }, - Ok(f) => { - let mut count = 0; - let bf = BufReader::new(f); - - for _line in bf.lines() { - count += 1; - } - - count - }, - }; - - assert_eq!(generated_row_count, 101); - } -} +//! The `data_sample_parser` module provides functionality to read sample data, parse and analyze it, +//! so that test data can be generated based on profiles. +//! +//! # Examples +//! +//! +//! Generate some demo test data ... +//! +//! ``` +//! extern crate test_data_generation; +//! +//! use test_data_generation::data_sample_parser::DataSampleParser; +//! +//! fn main() { +//! // initalize a new DataSampelParser +//! let dsp = DataSampleParser::new(); +//! +//! // generate some test data using the demo functions +//! println!("generate date:{}", dsp.demo_date()); +//! println!("generate person:{}", dsp.demo_person_name()); +//! } +//! ``` +//! +//! Save the algorithm ... +//! +//! Archive (export) the data sample parser object so that you can reuse the algorithm to generate test data at a later time. +//! This enables you to persist the algorithm without having to store the actual data sample that was used to create the algorithm - +//! Which is important if you used 'real' data in your sample data. +//! +//! ``` +//! extern crate test_data_generation; +//! +//! use test_data_generation::data_sample_parser::DataSampleParser; +//! +//! fn main() { +//! // analyze the dataset +//! let mut dsp = DataSampleParser::new(); +//! +//! assert_eq!(dsp.save(&String::from("./tests/samples/empty-dsp")).unwrap(), true); +//! } +//! ``` +//! +//! Load an algorithm ... +//! +//! Create a data sample parser from a previously saved (exported) archive file so you can generate test data based on the algorithm.
+//! *NOTE:* In this example, there was only one data point in the data smaple that was analyzed (the word 'OK'). This was intentional +//! so the algorithm would be guaranteed to generate that same word. This was done ensure the assert_eq! returns true. +//! +//! ``` +//! extern crate test_data_generation; +//! +//! use test_data_generation::data_sample_parser::DataSampleParser; +//! +//! fn main() { +//! let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); +//! +//! assert_eq!(dsp.generate_record()[0], "OK".to_string()); +//! } +//! ``` +//! +//! You can also generate a new csv file based on the data sample provided. +//! +//! ``` +//! extern crate test_data_generation; +//! +//! use test_data_generation::data_sample_parser::DataSampleParser; +//! +//! fn main() { +//! let mut dsp = DataSampleParser::new(); +//! +//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); +//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); +//! } +//! ``` +//! + +// use std::collections::BTreeMap; +use crate::configs::Configs; +use crate::engine::{Engine, EngineContainer}; +use crate::shared::CsvManipulator; +use crate::Profile; +use csv; +use indexmap::IndexMap; +use serde_json::map::Map; +use std::fs::File; +use std::io; +use std::io::prelude::*; +use std::io::Write; +use std::result::Result; +//use csv::StringRecord; +use csv::WriterBuilder; +use serde_json; +use serde_json::{json, Value}; +use std::error::Error; + +use std::sync::mpsc; +use std::sync::mpsc::{Receiver, Sender}; +use std::thread; + +// type ProfilesMap = BTreeMap; +type ProfilesMap = IndexMap; + +#[derive(Serialize, Deserialize, Debug)] +/// Represents the Parser for sample data to be used +pub struct DataSampleParser { + /// indicates if there were issues parsing and anlyzing the data sample + pub issues: bool, + /// Configs object that define the configuration settings + cfg: Option, + /// List of Profiles objects identified by a unique profile name LinkedHashMap + #[serde(with = "indexmap::serde_seq")] + profiles: ProfilesMap, +} + +impl CsvManipulator for DataSampleParser {} +impl Engine for DataSampleParser {} + +impl DataSampleParser { + /// Constructs a new DataSampleParser + /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let dsp = DataSampleParser::new(); + /// } + /// ``` + pub fn new() -> DataSampleParser { + DataSampleParser { + issues: false, + cfg: None, + profiles: ProfilesMap::new(), + } + } + + /// Constructs a new DataSampleParser + /// + /// # Arguments + /// + /// * `path: &String - The full path name (including the file name and extension) to the configuration file.
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// // param: the path to the configuration file + /// let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); + /// } + /// ``` + pub fn new_with(path: &String) -> DataSampleParser { + DataSampleParser { + issues: false, + cfg: Some(Configs::new(path)), + profiles: ProfilesMap::new(), + } + } + + /// Constructs a new DataSampleParser from an exported JSON file. This is used when restoring from "archive" + /// + /// # Arguments + /// + /// * `path: &String` - The full path name of the json formatted Data Sample Parser archive file.
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); + /// + /// assert_eq!(dsp.generate_record()[0], "OK".to_string()); + /// } + /// ``` + pub fn from_file(path: &String) -> DataSampleParser { + // open the archive file + let mut file = match File::open(format!("{}.json", &path)) { + Err(_e) => { + error!("Could not open file {:?}", &path.to_string()); + panic!("Could not open file {:?}", &path.to_string()); + } + Ok(f) => { + info!("Successfully opened file {:?}", &path.to_string()); + f + } + }; + + //read the archive file + let mut serialized = String::new(); + match file.read_to_string(&mut serialized) { + Err(e) => { + error!( + "Could not read file {:?} because of {:?}", + &path.to_string(), + e.to_string() + ); + panic!( + "Could not read file {:?} because of {:?}", + &path.to_string(), + e.to_string() + ); + } + Ok(s) => { + info!("Successfully read file {:?}", &path.to_string()); + s + } + }; + + // Support backwards compatibility for DSP saved using prior versions + let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); + let prfils = dsp.get("profiles").unwrap(); + + match prfils.is_array() { + true => { + debug!("Version 0.3.0 detected. Using latest version"); + return serde_json::from_str(&serialized).unwrap(); + } + false => { + info!("Prior version 0.2.1 detected. Trying to upgrade to latest version"); + + return Self::updgrade_to_latest_version(serialized); + } + } + } + + fn updgrade_to_latest_version(serialized: String) -> DataSampleParser { + let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); + let prfils = dsp.get("profiles").unwrap(); + let mut pm: ProfilesMap = ProfilesMap::new(); + let issues = dsp.get("issues").unwrap().as_bool().unwrap(); + + for prf in prfils.as_object().iter() { + for attr in prf.keys() { + let id = prf + .get(attr) + .unwrap() + .as_object() + .unwrap() + .get("id") + .unwrap() + .as_str() + .unwrap() + .to_string(); + let serl = &serde_json::to_string(prf.get(attr).unwrap()).unwrap(); + println!("{:?} : {:?}", id, serl); + pm.insert(id, Profile::from_serialized(serl)); + } + } + + let mut rtn = match dsp.get("cfg").unwrap() { + Null => DataSampleParser::new(), + _ => DataSampleParser::new_with( + &dsp.get("cfg") + .unwrap() + .as_object() + .unwrap() + .get("file") + .unwrap() + .as_str() + .unwrap() + .to_string(), + ), + }; + + rtn.issues = issues; + rtn.profiles = pm; + return rtn; + } + + fn analyze_columns(&mut self, profile_keys: Vec, columns: Vec>) { + let col_cnt = columns.len(); + let (tx, rx): ( + Sender>, + Receiver>, + ) = mpsc::channel(); + let mut jobs = Vec::new(); + + //iterate through all the columns + for (idx, column) in columns.iter().enumerate() { + let thread_tx = tx.clone(); + let container = EngineContainer { + profile: self.profiles.get(&profile_keys[idx]).unwrap().clone(), + entities: column.to_vec(), + }; + + let job = thread::spawn(move || { + let result = Self::profile_entities_with_container(container); + thread_tx.send(result).unwrap(); + }); + + jobs.push(job); + } + + let mut results = Vec::with_capacity(col_cnt); + for _ in 0..col_cnt { + results.push(rx.recv()); + } + + for job in jobs { + job.join().expect("Error: Could not run the job"); + } + + for result in results { + match result { + Ok(msg) => { + //received from sender + match msg { + Ok(p) => { + let id = p.clone().id.unwrap(); + debug!("Profile {} has finished analyzing the entities.", id); + self.profiles.insert(id, p); + } + Err(e) => { + error!( + "Profile wasn't able to analyzing the entities. Error: {}", + e + ); + } + } + } + Err(e) => { + // could not receive from sender + error!("Receiver wasn't able to receive message from sender which was analyzing entities for the profile. Error: {}", e); + panic!("Receiver wasn't able to receive message from sender which was analyzing entities for the profile. Error: {}", e); + } + } + } + // Multi-Threading END + } + + /// This function analyzes sample data that is a csv formatted string and returns a boolean if successful. + /// _NOTE:_ The csv properties are as follows: + /// + headers are included as first line + /// + double quote wrap text + /// + double quote escapes is enabled + /// + delimiter is a comma + /// + /// + /// # Arguments + /// + /// * `data: &String` - The textual content of a csv formatted sample data file.
+ /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// let mut data = String::from(""); + /// data.push_str("\"firstname\",\"lastname\"\n"); + /// data.push_str("\"Aaron\",\"Aaberg\"\n"); + /// data.push_str("\"Aaron\",\"Aaby\"\n"); + /// data.push_str("\"Abbey\",\"Aadland\"\n"); + /// data.push_str("\"Abbie\",\"Aagaard\"\n"); + /// data.push_str("\"Abby\",\"Aakre\""); + /// + /// assert_eq!(dsp.analyze_csv_data(&data).unwrap(),1); + /// } + /// ``` + pub fn analyze_csv_data(&mut self, data: &String) -> Result { + debug!("Starting to analyzed the csv data {}", data); + + let mut rdr = csv::ReaderBuilder::new() + .has_headers(true) + .quote(b'"') + .double_quote(true) + .delimiter(b',') + .from_reader(data.as_bytes()); + + //iterate through the headers + for headers in rdr.headers() { + for header in headers.iter() { + //add a Profile to the list of profiles to represent the field (indexed using the header label) + let p = Profile::new_with_id(format!("{}", header)); + self.profiles.insert(format!("{}", header), p); + } + } + + //create a Vec from all the keys (headers) in the profiles list + let profile_keys: Vec<_> = self.profiles.keys().cloned().collect(); + + debug!("CSV headers: {:?}", profile_keys); + + // Multi-Threading START + let columns = Self::read_as_columns(rdr); + //let col_cnt = columns.len(); + let rec_cnt = columns[0].len(); + self.analyze_columns(profile_keys, columns); + + debug!("Successfully analyzed the csv data"); + debug!( + "Analyzed {} records, {} fields", + rec_cnt, + self.profiles.len() + ); + + //prepare the profiles for data generation + self.profiles.iter_mut().for_each(|p| p.1.pre_generate()); + + Ok(1) + } + + /// This function analyzes sample data that is a csv formatted file and returns a boolean if successful. + /// _NOTE:_ The csv properties are as follows: + /// + headers are included as first line + /// + double quote wrap text + /// + double quote escapes is enabled + /// + delimiter is a comma + /// + /// + /// # Arguments + /// + /// * `path: &String` - The full path name of the csv formatted sample data file.
+ /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(),1); + /// } + /// ``` + pub fn analyze_csv_file(&mut self, path: &String) -> Result { + info!("Starting to analyzed the csv file {}", path); + + let mut file = (File::open(path).map_err(|e| { + error!("csv file {} couldn't be opened!", path); + e.to_string() + }))?; + + let mut data = String::new(); + file.read_to_string(&mut data) + .map_err(|e| { + error!("csv file {} couldn't be read!", path); + e.to_string() + }) + .unwrap(); + + self.analyze_csv_data(&data) + } + + /// This function generates date as strings using the a `demo` profile + /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let dsp = DataSampleParser::new(); + /// + /// // generate some test data using the demo functions + /// println!("generate date:{}", dsp.demo_date()); + /// } + /// ``` + pub fn demo_date(&self) -> String { + let mut profil = Profile::new(); + + profil.analyze("01/04/2017"); + profil.analyze("02/09/2017"); + profil.analyze("03/13/2017"); + profil.analyze("04/17/2017"); + profil.analyze("05/22/2017"); + profil.analyze("07/26/2017"); + profil.analyze("08/30/2017"); + profil.analyze("09/07/2017"); + profil.analyze("10/11/2017"); + profil.analyze("11/15/2017"); + profil.analyze("12/21/2017"); + profil.analyze("01/14/2016"); + profil.analyze("02/19/2016"); + profil.analyze("03/23/2016"); + profil.analyze("04/27/2016"); + profil.analyze("05/02/2016"); + profil.analyze("07/16/2015"); + profil.analyze("08/20/2015"); + profil.analyze("09/17/2015"); + profil.analyze("10/01/2014"); + profil.analyze("11/25/2014"); + profil.analyze("12/31/2018"); + + profil.pre_generate(); + //profil.apply_facts("##p##p####".to_string()) + profil.generate() + } + + /// This function generates people's names as strings using the a `demo` profile + /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let dsp = DataSampleParser::new(); + /// + /// // generate some test data using the demo functions + /// println!("generate date:{}", dsp.demo_person_name()); + /// } + pub fn demo_person_name(&self) -> String { + let mut profil = Profile::new(); + + profil.analyze("Smith, John"); + profil.analyze("O'Brien, Henny"); + profil.analyze("Dale, Danny"); + profil.analyze("Rickets, Ronnae"); + profil.analyze("Richard, Richie"); + profil.analyze("Roberts, Blake"); + profil.analyze("Conways, Sephen"); + + profil.pre_generate(); + profil.generate() + } + + /// This function returns a vector of header names + /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// let headers = dsp.extract_headers(); + /// + /// assert_eq!(headers.len(), 2); + /// } + pub fn extract_headers(&mut self) -> Vec { + let mut headers = vec![]; + + for profile in self.profiles.iter_mut() { + headers.push(profile.0.to_string()); + } + + headers + } + + /// This function generates test data for the specified field name. + /// + /// # Arguments + /// + /// * `field: String` - The name of the field (e.g.: firstname) the represents the profile to use when generating the test data.
+ /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// println!("Generated data for first name {}",dsp.generate_by_field_name("firstname".to_string())); + /// } + /// ``` + pub fn generate_by_field_name(&mut self, field: String) -> String { + self.profiles + .get_mut(&field) + .unwrap() + .generate() + .to_string() + } + + /// This function Vec of generates test data fields. + /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// println!("Generated data record: {:?}",dsp.generate_record()); + /// } + /// ``` + pub fn generate_record(&mut self) -> Vec { + let mut record = Vec::new(); + + for profile in self.profiles.iter_mut() { + record.push(profile.1.generate().to_string()); + } + + record + } + + /// This function creates a csv file of generated test data. + /// Prior to calling this funciton, you need to call the analyze_csv_file() function. + /// _NOTE:_ The csv properties are as follows: + /// + headers are included as first line + /// + double quotes wrap text + /// + double quote escapes is enabled + /// + delimiter is a comma + /// + /// + /// # Arguments + /// + /// * `row_count: u32` - The number of rows to generate.
+ /// * `path: &String` - The full path name where to save the csv file.
+ /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// let mut dsp = DataSampleParser::new(); + /// + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); + /// } + /// ``` + pub fn generate_csv(&mut self, row_count: u32, path: &String) -> Result<(), Box> { + info!("generating csv file {}", path); + + let mut wtr = (WriterBuilder::new() + .has_headers(true) + .quote(b'"') + .double_quote(true) + .delimiter(b',') + .from_path(path) + .map_err(|e| { + error!("csv file {} couldn't be created!", path); + e.to_string() + }))?; + + let headers = self.extract_headers(); + wtr.write_record(&headers)?; + + for _r in 0..row_count { + let mut record = Vec::new(); + + for profile in self.profiles.iter_mut() { + record.push(profile.1.generate()); + } + + wtr.write_record(&record)?; + } + + wtr.flush()?; + + Ok(()) + } + + /// This function calculates the levenshtein distance between 2 strings. + /// See: https://crates.io/crates/levenshtein + /// + /// # Arguments + /// + /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+ /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // analyze the dataset + /// let mut dsp = DataSampleParser::new(); + /// + /// assert_eq!(dsp.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); + /// } + /// + pub fn levenshtein_distance(&mut self, control: &String, experiment: &String) -> usize { + // https://docs.rs/levenshtein/1.0.3/levenshtein/fn.levenshtein.html + levenshtein_distance!(control, experiment) + } + + /// This function calculates the percent difference between 2 strings. + /// + /// # Arguments + /// + /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+ /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // analyze the dataset + /// let mut dsp = DataSampleParser::new(); + /// + /// assert_eq!(dsp.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); + /// } + /// + pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 { + //https://docs.rs/GSL/0.4.31/rgsl/statistics/fn.correlation.html + //http://www.statisticshowto.com/probability-and-statistics/correlation-coefficient-formula/ + // pearson's chi square test + // cosine similarity - http://blog.christianperone.com/2013/09/machine-learning-cosine-similarity-for-vector-space-models-part-iii/ + realistic_test!(control, experiment) + } + + /// This function returns a boolean that indicates if the data sample parsing had issues + /// + /// # Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // initalize a new DataSampelParser + /// // param: the path to the configuration file is wrong + /// let dsp = DataSampleParser::new_with(&String::from("./target/debug/config/tdg.yaml")); + /// + /// // generate some test data using the demo functions + /// assert_eq!(dsp.running_with_issues(), &false); + /// } + pub fn running_with_issues(&self) -> &bool { + &self.issues + } + + /// This function saves (exports) the DataSampleParser to a JSON file. + /// This is useful when you wish to reuse the algorithm to generate more test data later. + /// + /// # Arguments + /// + /// * `field: &String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
+ /// + /// #Errors + /// If this function encounters any form of I/O or other error, an error variant will be returned. + /// Otherwise, the function returns Ok(true).
+ /// + /// #Example + /// + /// ``` + /// extern crate test_data_generation; + /// + /// use test_data_generation::data_sample_parser::DataSampleParser; + /// + /// fn main() { + /// // analyze the dataset + /// let mut dsp = DataSampleParser::new(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")).unwrap(); + /// + /// assert_eq!(dsp.save(&String::from("./tests/samples/sample-00-dsp")).unwrap(), true); + /// } + /// + pub fn save(&mut self, path: &String) -> Result { + let dsp_json = serde_json::to_string(&self).unwrap(); + + // Create the archive file + let mut file = match File::create(format!("{}.json", &path)) { + Err(e) => { + error!("Could not create file {:?}", &path.to_string()); + return Err(e); + } + Ok(f) => { + info!("Successfully exported to {:?}", &path.to_string()); + f + } + }; + + // Write the json string to file, returns io::Result<()> + match file.write_all(dsp_json.as_bytes()) { + Err(e) => { + error!("Could not write to file {}", &path.to_string()); + return Err(e); + } + Ok(_) => { + info!("Successfully exported to {}", &path.to_string()); + } + }; + + Ok(true) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::File; + use std::io::BufReader; + + #[test] + // ensure a new Data Sample Parser can be created + fn test_new() { + let dsp = DataSampleParser::new(); + + assert!(true); + } + + #[test] + // ensure a new Data Sample Parser can be created with configurations + fn test_new_with() { + let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); + + assert!(true); + } + + #[test] + // ensure the Data Sample Parser can be restored from archived file + fn test_from_file() { + let mut dsp = DataSampleParser::from_file(&String::from("./tests/samples/sample-00-dsp")); + println!("Sample data is [{:?}]", dsp.generate_record()[0]); + + assert_eq!(dsp.generate_record()[0], "OK".to_string()); + } + + #[test] + // ensure the Data Sample Parser can be restored from archived file that + // was saved using version 0.2.1 using a configuration + fn test_from_file_v021_with_cfg() { + let mut dsp = + DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-dsp")); + println!("Sample data is [{:?}]", dsp.generate_record()[0]); + + assert_eq!(dsp.generate_record()[0], "OK".to_string()); + } + + #[test] + // ensure the Data Sample Parser can be restored from archived file that + // was saved using version 0.2.1 without a configuration + fn test_from_file_v021_no_cfg() { + let mut dsp = + DataSampleParser::from_file(&String::from("./tests/samples/sample-0.2.1-nocfg-dsp")); + println!("Sample data is [{:?}]", dsp.generate_record()[0]); + + assert_eq!(dsp.generate_record()[0], "OK".to_string()); + } + + #[test] + // ensure the Data Sample Parser can read all the headers from teh csv file + fn test_read_headers() { + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + let headers = dsp.extract_headers(); + + assert_eq!(headers.len(), 2); + } + + #[test] + // ensure the Data Sample Parser can read all the headers from teh csv file + fn test_read_headers_order() { + let mut expected = Vec::new(); + expected.push("column-Z"); + expected.push("column-D"); + expected.push("column-A"); + expected.push("column-G"); + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-02.csv")) + .unwrap(); + let headers = dsp.extract_headers(); + + assert_eq!(headers, expected); + } + + #[test] + // ensure DataSampleParser can analyze a csv formatted file + fn test_parse_csv_file() { + let mut dsp = DataSampleParser::new(); + + assert_eq!( + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(), + 1 + ); + } + + #[test] + // ensure DataSampleParser can analyze a csv formatted text + fn test_parse_csv_data() { + let mut dsp = DataSampleParser::new(); + let mut data = String::from(""); + data.push_str("\"firstname\",\"lastname\"\n"); + data.push_str("\"Aaron\",\"Aaberg\"\n"); + data.push_str("\"Aaron\",\"Aaby\"\n"); + data.push_str("\"Abbey\",\"Aadland\"\n"); + data.push_str("\"Abbie\",\"Aagaard\"\n"); + data.push_str("\"Abby\",\"Aakre\""); + + assert_eq!(dsp.analyze_csv_data(&data).unwrap(), 1); + } + + #[test] + // ensure DataSampleParser can analyze a csv formatted file + fn test_generate_field_from_csv_file() { + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + println!( + "Generated data for first name {}", + dsp.generate_by_field_name("firstname".to_string()) + ); + } + + #[test] + // ensure DataSampleParser can analyze a csv formatted file + fn test_generate_record_from_csv_file() { + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + assert_eq!(dsp.generate_record().len(), 2); + } + + #[test] + // ensure DataSampleParser can analyze a csv formatted file + fn test_parse_csv_file_bad() { + let mut dsp = DataSampleParser::new(); + + assert_eq!( + dsp.analyze_csv_file(&String::from("./badpath/sample-01.csv")) + .is_err(), + true + ); + } + + #[test] + // ensure the DataSampleParser object can be saved to file + fn test_save() { + let mut dsp = DataSampleParser::new(); + dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")) + .unwrap(); + + assert_eq!( + dsp.save(&String::from("./tests/samples/sample-00-dsp")) + .unwrap(), + true + ); + } + + #[test] + // ensure the DataSampleParser object can recognize the difference between realistic data and unrealistic generated data + fn test_levenshtein_test() { + let mut dsp = DataSampleParser::new(); + + assert_eq!( + dsp.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), + 3 as usize + ); + } + + #[test] + // ensure the DataSampleParser object can recognize the difference between realistic data and unrealistic generated data + fn test_realistic_data_test() { + let mut dsp = DataSampleParser::new(); + + assert_eq!( + dsp.realistic_test(&"kitten".to_string(), &"sitting".to_string()), + 76.92307692307692 as f64 + ); + } + + #[test] + // demo test + fn test_demo() { + let mut dsp = DataSampleParser::new(); + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + + println!( + "My new name is {} {}", + dsp.generate_record()[0], + dsp.generate_record()[1] + ); + + assert!(true); + } + + #[test] + // ensure the DataSampleParser object can generate test data as a csv file + fn test_extract_headers_from_sample() { + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + let headers = dsp.extract_headers(); + + assert_eq!(headers.len(), 2); + } + + #[test] + // ensure the DataSampleParser object can generate test data as a csv file + fn test_generate_csv_test_data_from_sample() { + let mut dsp = DataSampleParser::new(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap(); + dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")) + .unwrap(); + + let generated_row_count = + match File::open(format!("{}", "./tests/samples/generated-01.csv")) { + Err(_e) => 0, + Ok(f) => { + let mut count = 0; + let bf = BufReader::new(f); + + for _line in bf.lines() { + count += 1; + } + + count + } + }; + + assert_eq!(generated_row_count, 101); + } +} diff --git a/src/engine/mod.rs b/src/engine/mod.rs index 5d55c06..dcd7133 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -1,9 +1,9 @@ -//! -//! +//! +//! //! # Fact -//! The Fact object is a representation of a character based on its context within a data entity. +//! The Fact object is a representation of a character based on its context within a data entity. //! Facts are created during the analyze process and then later used to generate data from the algorithm. -//! +//! //! ## Example //! //! ```rust @@ -22,7 +22,7 @@ //! fact.set_prior_key('o'); //! } //! ``` -//! +//! //! # PatternDefinition //! The PatternDefinition provides functionality to retrieve symbols that are used in defining a pattern. //! @@ -43,7 +43,7 @@ //! extern crate test_data_generation; //! //! use test_data_generation::engine::PatternDefinition; -//! +//! //! fn main() { //! let pttrn_def = PatternDefinition::new(); //! println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string())); @@ -53,85 +53,85 @@ use regex::Regex; use serde_json; use std::collections::BTreeMap; -use std::sync::mpsc::{Sender, Receiver}; use std::sync::mpsc; +use std::sync::mpsc::{Receiver, Sender}; use std::thread; use crate::Profile; //use async_trait::async_trait; #[allow(dead_code)] -type PatternMap = BTreeMap; +type PatternMap = BTreeMap; #[derive(Clone, Serialize, Deserialize, Debug)] /// Represents a Fact for a character in a sample data entity that has been analyzed -pub struct Fact{ - /// the char that the fact defines (.e.g: 'a', '1', '%', etc.) - pub key: char, - /// the char that appears before (-1) the key in the entity - pub prior_key: Option, - /// the char that appears after (+1) the key in the entity - pub next_key: Option, - /// the PatternPlaceholder symbol that represents the type of key - pub pattern_placeholder: char, - /// indicates if the key is the first char in the entity (0=no, 1=yes) - pub starts_with: u32, - /// indicates if the key is the last char in the entity (0=no, 1=yes) - pub ends_with: u32, - /// indicates the number of positions from the index zero (where the char is located in the entity from the first position) - pub index_offset: u32, +pub struct Fact { + /// the char that the fact defines (.e.g: 'a', '1', '%', etc.) + pub key: char, + /// the char that appears before (-1) the key in the entity + pub prior_key: Option, + /// the char that appears after (+1) the key in the entity + pub next_key: Option, + /// the PatternPlaceholder symbol that represents the type of key + pub pattern_placeholder: char, + /// indicates if the key is the first char in the entity (0=no, 1=yes) + pub starts_with: u32, + /// indicates if the key is the last char in the entity (0=no, 1=yes) + pub ends_with: u32, + /// indicates the number of positions from the index zero (where the char is located in the entity from the first position) + pub index_offset: u32, } impl Fact { - /// Constructs a new Fact - /// - /// # Arguments - /// - /// * `k: char` - The char that the Fact represents (also known as the `key`).
- /// * `pp: char` - The char that represents the patter placeholder for the key.
- /// * `sw: u32` - Indicates is the key is the first char in the entity. (0=no, 1=yes)
- /// * `ew: u32` - Indicates is the key is the last char in the entity. (0=no, 1=yes)
- /// * `idx_off: u32` - The index that represents the postion of the key from the beginning of the entity (zero based).
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::Fact; - /// - /// fn main() { - /// //fact created for the character 'r' in the string "word" + /// Constructs a new Fact + /// + /// # Arguments + /// + /// * `k: char` - The char that the Fact represents (also known as the `key`).
+ /// * `pp: char` - The char that represents the patter placeholder for the key.
+ /// * `sw: u32` - Indicates is the key is the first char in the entity. (0=no, 1=yes)
+ /// * `ew: u32` - Indicates is the key is the last char in the entity. (0=no, 1=yes)
+ /// * `idx_off: u32` - The index that represents the postion of the key from the beginning of the entity (zero based).
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::Fact; + /// + /// fn main() { + /// //fact created for the character 'r' in the string "word" /// let mut fact = Fact::new('r','c',0,0,2); - /// } - /// ``` - pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32 ) -> Fact { - Fact{ - key: k, - prior_key: None, - next_key: None, - pattern_placeholder: pp, - starts_with: sw, - ends_with: ew, - index_offset: idx_off, - } - } - - /// Constructs a new Fact from a serialized (JSON) string of the Fact object. This is used when restoring from "archive" - /// - /// # Arguments - /// - /// * `serialized: &str` - The JSON string that represents the archived Fact object.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::Fact; - /// - /// fn main() { - /// let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"; + /// } + /// ``` + pub fn new(k: char, pp: char, sw: u32, ew: u32, idx_off: u32) -> Fact { + Fact { + key: k, + prior_key: None, + next_key: None, + pattern_placeholder: pp, + starts_with: sw, + ends_with: ew, + index_offset: idx_off, + } + } + + /// Constructs a new Fact from a serialized (JSON) string of the Fact object. This is used when restoring from "archive" + /// + /// # Arguments + /// + /// * `serialized: &str` - The JSON string that represents the archived Fact object.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::Fact; + /// + /// fn main() { + /// let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"; /// let mut fact = Fact::from_serialized(&serialized); /// fact.set_prior_key('a'); /// fact.set_next_key('e'); @@ -139,9 +139,9 @@ impl Fact { /// assert_eq!(fact.pattern_placeholder, 'c'); /// } /// ``` - pub fn from_serialized(serialized: &str) -> Fact { - serde_json::from_str(&serialized).unwrap() - } + pub fn from_serialized(serialized: &str) -> Fact { + serde_json::from_str(&serialized).unwrap() + } /// This function converts the Fact to a serialize JSON string. /// @@ -160,9 +160,9 @@ impl Fact { /// // {"key":"r","prior_key":null,"next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2} /// } /// - pub fn serialize(&mut self) ->String { - serde_json::to_string(&self).unwrap() - } + pub fn serialize(&mut self) -> String { + serde_json::to_string(&self).unwrap() + } /// This function sets the next key attribute to the specified char. /// @@ -183,9 +183,9 @@ impl Fact { /// fact.set_next_key('d'); /// } /// - pub fn set_next_key(&mut self, nk: char) { - self.next_key = Some(nk); - } + pub fn set_next_key(&mut self, nk: char) { + self.next_key = Some(nk); + } /// This function sets the prior key attribute to the specified char. /// @@ -206,27 +206,27 @@ impl Fact { /// fact.set_prior_key('o'); /// } /// - pub fn set_prior_key(&mut self, pk: char) { - self.prior_key = Some(pk); - } + pub fn set_prior_key(&mut self, pk: char) { + self.prior_key = Some(pk); + } } /// Represents a symbolic pattern of an entity (String) pub struct Pattern { - /// The regex rule used to find upper case consonants - regex_consonant_upper: Regex, - /// The regex rule used to find lower case consonants - regex_consonant_lower: Regex, - /// The regex rule used to find upper case vowels - regex_vowel_upper: Regex, - /// The regex rule used to find lower case vowels - regex_vowel_lower: Regex, - /// The regex rule used to find numeric digits - regex_numeric: Regex, - /// The regex rule used to find punctuation - regex_punctuation: Regex, - /// The regex rule used to find white spaces - regex_space: Regex, + /// The regex rule used to find upper case consonants + regex_consonant_upper: Regex, + /// The regex rule used to find lower case consonants + regex_consonant_lower: Regex, + /// The regex rule used to find upper case vowels + regex_vowel_upper: Regex, + /// The regex rule used to find lower case vowels + regex_vowel_lower: Regex, + /// The regex rule used to find numeric digits + regex_numeric: Regex, + /// The regex rule used to find punctuation + regex_punctuation: Regex, + /// The regex rule used to find white spaces + regex_space: Regex, } impl Default for Pattern { @@ -251,162 +251,170 @@ pub struct PatternDefinition { impl PatternDefinition { /// Constructs a new PatternDefinition - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::PatternDefinition; - /// - /// fn main() { - /// let pttrn_def = PatternDefinition::new(); - /// } - /// ``` - pub fn new() -> PatternDefinition { - let symbols: [char; 9] = ['@','C','c','V','v','#','~','S','p']; + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::PatternDefinition; + /// + /// fn main() { + /// let pttrn_def = PatternDefinition::new(); + /// } + /// ``` + pub fn new() -> PatternDefinition { + let symbols: [char; 9] = ['@', 'C', 'c', 'V', 'v', '#', '~', 'S', 'p']; let mut pttrn_def = PatternMap::new(); - pttrn_def.insert("Unknown".to_string(), symbols[0]); - pttrn_def.insert("ConsonantUpper".to_string(), symbols[1]); - pttrn_def.insert("ConsonantLower".to_string(), symbols[2]); - pttrn_def.insert("VowelUpper".to_string(), symbols[3]); - pttrn_def.insert("VowelLower".to_string(), symbols[4]); - pttrn_def.insert("Numeric".to_string(), symbols[5]); - pttrn_def.insert("RegExSpcChar".to_string(), symbols[6]); - pttrn_def.insert("WhiteSpace".to_string(), symbols[7]); - pttrn_def.insert("Punctuation".to_string(), symbols[8]); - - PatternDefinition{ + pttrn_def.insert("Unknown".to_string(), symbols[0]); + pttrn_def.insert("ConsonantUpper".to_string(), symbols[1]); + pttrn_def.insert("ConsonantLower".to_string(), symbols[2]); + pttrn_def.insert("VowelUpper".to_string(), symbols[3]); + pttrn_def.insert("VowelLower".to_string(), symbols[4]); + pttrn_def.insert("Numeric".to_string(), symbols[5]); + pttrn_def.insert("RegExSpcChar".to_string(), symbols[6]); + pttrn_def.insert("WhiteSpace".to_string(), symbols[7]); + pttrn_def.insert("Punctuation".to_string(), symbols[8]); + + PatternDefinition { pattern_map: pttrn_def, pattern: Pattern::default(), - } - } + } + } /// This function converts an entity (&str) into a tuplet (String, Vec)
- /// - /// # Arguments - /// - /// * `entity: String` - The textual str of the value to anaylze.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::PatternDefinition; - /// - /// fn main() { - /// let mut pttrn_def = PatternDefinition::new(); + /// + /// # Arguments + /// + /// * `entity: String` - The textual str of the value to anaylze.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::PatternDefinition; + /// + /// fn main() { + /// let mut pttrn_def = PatternDefinition::new(); /// //async { /// let rslt = pttrn_def.analyze("Hello World"); /// assert_eq!(rslt.0, "CvccvSCvccc"); /// //} - /// } - /// ``` - pub fn analyze(&mut self, entity: &str) -> (String, Vec) { - // record the length of the passed value - //self.size = entity.len() as u32; - - // String to hold the pattern - let mut pttrn = String::new(); - - // Vec to hold all the Facts to be returned - let mut facts = Vec::new(); - - // record the pattern of the passed value - for (i, _c) in entity.chars().enumerate() { - //let fact = self.factualize(&entity, i as u32); - let idx: u32 = i as u32; - let fact = self.factualize(entity, idx); - pttrn.push_str(&*fact.pattern_placeholder.to_string()); - facts.push(fact); - } - - (pttrn, facts) - } + /// } + /// ``` + pub fn analyze(&mut self, entity: &str) -> (String, Vec) { + // record the length of the passed value + //self.size = entity.len() as u32; + + // String to hold the pattern + let mut pttrn = String::new(); + + // Vec to hold all the Facts to be returned + let mut facts = Vec::new(); + + // record the pattern of the passed value + for (i, _c) in entity.chars().enumerate() { + //let fact = self.factualize(&entity, i as u32); + let idx: u32 = i as u32; + let fact = self.factualize(entity, idx); + pttrn.push_str(&*fact.pattern_placeholder.to_string()); + facts.push(fact); + } + + (pttrn, facts) + } /// This function converts a char in an entity (&str) based on the index specified into a Fact
- /// - /// # Arguments - /// - /// * `entity: String` - The textual str of the value to anaylze.
- /// * `idx: u32` - The index that specifies the position of the char in the entity to convert to a Fact.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::PatternDefinition; - /// - /// fn main() { - /// let mut pttrn_def = PatternDefinition::new(); - /// let fact = pttrn_def.factualize("Word",0); - /// // will return a Fact that represents the char `W` - /// } - /// ``` - pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact { - let c = entity.chars().nth(idx as usize).unwrap(); - let pp = self.symbolize_char(c); - let pk = if idx > 0 {entity.chars().nth(idx as usize -1)} else {None}; - let nk = if idx < entity.len() as u32 -1 {entity.chars().nth(idx as usize +1)} else {None}; - let sw = if idx == 0 {1} else {0}; - let ew = if idx == entity.len() as u32 -1 {1} else {0}; - - let mut fact = Fact::new(c,pp,sw,ew,idx); - - // only if there is a next key - if nk.is_some() { - &fact.set_next_key(nk.unwrap()); - } - - // only if there is a prior key - if pk.is_some() { - &fact.set_prior_key(pk.unwrap()); - } - - fact - } - - /// This function returns a pattern symbol that represents the type of character - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::PatternDefinition; - /// - /// fn main() { - /// let pttrn_def = PatternDefinition::new(); - /// println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string())); - /// } - /// ``` - pub fn get(&self, key: &str) -> char { - *self.pattern_map.get(key).unwrap() + /// + /// # Arguments + /// + /// * `entity: String` - The textual str of the value to anaylze.
+ /// * `idx: u32` - The index that specifies the position of the char in the entity to convert to a Fact.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::PatternDefinition; + /// + /// fn main() { + /// let mut pttrn_def = PatternDefinition::new(); + /// let fact = pttrn_def.factualize("Word",0); + /// // will return a Fact that represents the char `W` + /// } + /// ``` + pub fn factualize(&mut self, entity: &str, idx: u32) -> Fact { + let c = entity.chars().nth(idx as usize).unwrap(); + let pp = self.symbolize_char(c); + let pk = if idx > 0 { + entity.chars().nth(idx as usize - 1) + } else { + None + }; + let nk = if idx < entity.len() as u32 - 1 { + entity.chars().nth(idx as usize + 1) + } else { + None + }; + let sw = if idx == 0 { 1 } else { 0 }; + let ew = if idx == entity.len() as u32 - 1 { 1 } else { 0 }; + + let mut fact = Fact::new(c, pp, sw, ew, idx); + + // only if there is a next key + if nk.is_some() { + &fact.set_next_key(nk.unwrap()); + } + + // only if there is a prior key + if pk.is_some() { + &fact.set_prior_key(pk.unwrap()); + } + + fact } - + + /// This function returns a pattern symbol that represents the type of character + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::PatternDefinition; + /// + /// fn main() { + /// let pttrn_def = PatternDefinition::new(); + /// println!("Upper case vowel symbol: {:?}", pttrn_def.get(&"VowelUpper".to_string())); + /// } + /// ``` + pub fn get(&self, key: &str) -> char { + *self.pattern_map.get(key).unwrap() + } + /// This function converts a char into a pattern symbol - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::engine::PatternDefinition; - /// - /// fn main() { - /// let pttrn_def = PatternDefinition::new(); - /// println!("The pattern symbol for 'A' is {:?}", pttrn_def.symbolize_char('A')); - /// // The pattern symbol for 'A' is V - /// } - /// ``` + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::engine::PatternDefinition; + /// + /// fn main() { + /// let pttrn_def = PatternDefinition::new(); + /// println!("The pattern symbol for 'A' is {:?}", pttrn_def.symbolize_char('A')); + /// // The pattern symbol for 'A' is V + /// } + /// ``` pub fn symbolize_char(&self, c: char) -> char { // if you have to escape regex special characters: &*regex::escape(&*$c.to_string()) - let mut symbol = self.pattern_map.get("Unknown"); + let mut symbol = self.pattern_map.get("Unknown"); let mut found = false; - + if !found && self.pattern.regex_consonant_upper.is_match(&c.to_string()) { symbol = self.pattern_map.get("ConsonantUpper"); found = true; @@ -453,62 +461,64 @@ impl PatternDefinition { pub trait Engine { fn analyze_entities(entities: Vec) -> Vec<(String, Vec)> { - let (tx, rx): (Sender<(String, Vec)>, Receiver<(String, Vec)>) = mpsc::channel(); + let (tx, rx): (Sender<(String, Vec)>, Receiver<(String, Vec)>) = + mpsc::channel(); let mut children = Vec::new(); - + for entity in entities.clone() { let thread_tx = tx.clone(); let child = thread::spawn(move || { - thread_tx.send(PatternDefinition::new().analyze(&entity)).unwrap(); + thread_tx + .send(PatternDefinition::new().analyze(&entity)) + .unwrap(); debug!("PatternDefinition::analyze thread finished for {}", entity); }); - + children.push(child); } - + let mut results = Vec::new(); for entity in entities { - results.push( - match rx.recv() { - Ok(result) => result, - Err(_) => { - error!("Error: Could not anaylze the entity: {}", entity); - panic!("Error: Could not anaylze the data!") - } + results.push(match rx.recv() { + Ok(result) => result, + Err(_) => { + error!("Error: Could not anaylze the entity: {}", entity); + panic!("Error: Could not anaylze the data!") } - ); + }); } - + for child in children { child.join().expect("Error: Could not anaylze the data!"); } results - } - - fn profile_entities(mut profile: Profile, entities: Vec) -> Result { - let results = Self::analyze_entities(entities); - - for result in results { - match profile.apply_facts(result.0, result.1) { - Ok(_) => {}, - Err(e) => { - return Err(format!("Error: Couldn't apply the Pattern and Facts to the Profile. Error Message: {}", e.to_string())) - } - } - } - - Ok(profile) - } - - fn profile_entities_with_container(container: EngineContainer) -> Result { - Self::profile_entities(container.profile, container.entities) - } + } + + fn profile_entities(mut profile: Profile, entities: Vec) -> Result { + let results = Self::analyze_entities(entities); + + for result in results { + match profile.apply_facts(result.0, result.1) { + Ok(_) => {} + Err(e) => return Err(format!( + "Error: Couldn't apply the Pattern and Facts to the Profile. Error Message: {}", + e.to_string() + )), + } + } + + Ok(profile) + } + + fn profile_entities_with_container(container: EngineContainer) -> Result { + Self::profile_entities(container.profile, container.entities) + } } pub struct EngineContainer { - pub profile: Profile, - pub entities: Vec, + pub profile: Profile, + pub entities: Vec, } // Unit Tests @@ -516,45 +526,45 @@ pub struct EngineContainer { mod tests { use super::*; - struct Xtest{} - impl Engine for Xtest{} + struct Xtest {} + impl Engine for Xtest {} #[test] - fn test_fact_new(){ + fn test_fact_new() { //fact created for the character 'r' in the string "word" - let _fact = Fact::new('r','c',0,0,2); - - assert!(true); + let _fact = Fact::new('r', 'c', 0, 0, 2); + + assert!(true); } - + #[test] - fn test_fact_new_from_serialized(){ - let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"; - let fact = Fact::from_serialized(&serialized); - assert_eq!(fact.pattern_placeholder, 'c'); + fn test_fact_new_from_serialized() { + let serialized = "{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"; + let fact = Fact::from_serialized(&serialized); + assert_eq!(fact.pattern_placeholder, 'c'); } - + #[test] - fn test_fact_serialize(){ + fn test_fact_serialize() { //fact created for the character 'r' in the string "word" - let mut fact = Fact::new('r','c',0,0,2); - let serialized = fact.serialize(); - - assert_eq!(serialized,"{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"); + let mut fact = Fact::new('r', 'c', 0, 0, 2); + let serialized = fact.serialize(); + + assert_eq!(serialized,"{\"key\":\"r\",\"prior_key\":null,\"next_key\":null,\"pattern_placeholder\":\"c\",\"starts_with\":0,\"ends_with\":0,\"index_offset\":2}"); } - + #[test] - fn test_fact_set_next_key(){ + fn test_fact_set_next_key() { //fact created for the character 'r' in the string "word" - let mut fact = Fact::new('r','c',0,0,2); - fact.set_next_key('d'); + let mut fact = Fact::new('r', 'c', 0, 0, 2); + fact.set_next_key('d'); } - + #[test] - fn test_fact_set_prior_key(){ + fn test_fact_set_prior_key() { //fact created for the character 'r' in the string "word" - let mut fact = Fact::new('r','c',0,0,2); - fact.set_prior_key('o'); + let mut fact = Fact::new('r', 'c', 0, 0, 2); + fact.set_prior_key('o'); } #[test] @@ -564,49 +574,61 @@ mod tests { } #[test] - fn test_pattern_definition_symbolize_char(){ - let pttrn_def = PatternDefinition::new(); + fn test_pattern_definition_symbolize_char() { + let pttrn_def = PatternDefinition::new(); - assert_eq!(pttrn_def.symbolize_char('A'), 'V'); + assert_eq!(pttrn_def.symbolize_char('A'), 'V'); } #[test] - fn test_pattern_definition_factualize(){ - let mut pttrn_def = PatternDefinition::new(); - let mut fact1 = pttrn_def.factualize("Word",1); - let mut fact2 = Fact::new('o','v',0,0,1); - fact2.set_prior_key('W'); - fact2.set_next_key('r'); - - assert_eq!(fact1.serialize(), fact2.serialize()); + fn test_pattern_definition_factualize() { + let mut pttrn_def = PatternDefinition::new(); + let mut fact1 = pttrn_def.factualize("Word", 1); + let mut fact2 = Fact::new('o', 'v', 0, 0, 1); + fact2.set_prior_key('W'); + fact2.set_next_key('r'); + + assert_eq!(fact1.serialize(), fact2.serialize()); } #[test] - fn test_pattern_definition_analyze(){ - let mut pttrn_def = PatternDefinition::new(); + fn test_pattern_definition_analyze() { + let mut pttrn_def = PatternDefinition::new(); let word = pttrn_def.analyze("HELlo0?^@"); - + assert_eq!(word.0, "CVCcv#pp@"); assert_eq!(word.1.len(), 9); } #[test] - fn test_pattern_definition_analyze_multithread(){ - let words = vec!("word-one".to_string(),"word-two".to_string(),"word-three".to_string(),"word-four".to_string(),"word-five".to_string()); + fn test_pattern_definition_analyze_multithread() { + let words = vec![ + "word-one".to_string(), + "word-two".to_string(), + "word-three".to_string(), + "word-four".to_string(), + "word-five".to_string(), + ]; let results = Xtest::analyze_entities(words); println!("{:?}", results); assert_eq!(results.len(), 5); - } - - #[test] - fn test_profile_entities() { - //async { - let profile = Profile::new(); - let words = vec!("word-one".to_string(),"word-two".to_string(),"word-three".to_string(),"word-four".to_string(),"word-five".to_string()); - let result = Xtest::profile_entities(profile, words); - assert!(result.is_ok()); - //}; - } -} \ No newline at end of file + } + + #[test] + fn test_profile_entities() { + //async { + let profile = Profile::new(); + let words = vec![ + "word-one".to_string(), + "word-two".to_string(), + "word-three".to_string(), + "word-four".to_string(), + "word-five".to_string(), + ]; + let result = Xtest::profile_entities(profile, words); + assert!(result.is_ok()); + //}; + } +} diff --git a/src/lib.rs b/src/lib.rs index 82cc077..d8a7916 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -124,7 +124,7 @@ //! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); //! } //! ``` -#![crate_type= "lib"] +#![crate_type = "lib"] #![crate_name = "test_data_generation"] #[macro_use] @@ -132,248 +132,255 @@ extern crate log; #[macro_use] extern crate serde_derive; +extern crate crossbeam; +extern crate csv; +extern crate indexmap; +extern crate levenshtein; +extern crate rand; +extern crate regex; extern crate serde; extern crate serde_json; extern crate serde_yaml; extern crate yaml_rust; -extern crate regex; -extern crate rand; -extern crate crossbeam; -extern crate csv; -extern crate levenshtein; -extern crate indexmap; use crate::engine::{Fact, PatternDefinition}; use std::collections::BTreeMap; -use std::ops::AddAssign; use std::fs::File; use std::io; -use std::io::Write; use std::io::prelude::*; +use std::io::Write; +use std::ops::AddAssign; type PatternMap = BTreeMap; type SizeMap = BTreeMap; -type SizeRankMap = BTreeMap; +type SizeRankMap = BTreeMap; #[derive(Clone, Serialize, Deserialize, Debug)] /// Represents a Profile for sample data that has been analyzed and can be used to generate realistic data -pub struct Profile { - /// An identifier (not necessarily unique) that is used to differentiate profiles from one another - pub id: Option, +pub struct Profile { + /// An identifier (not necessarily unique) that is used to differentiate profiles from one another + pub id: Option, /// A list of symbolic patterns with a distinct count of occurrences - pub patterns: PatternMap, - /// The total number of patterns in the profile - pub pattern_total: u32, - /// A list of symbolic patterns in the profile - /// (used for temporary storage due to lifetime issues) - pub pattern_keys: Vec, - /// A list of distinct counts for patterns in the profile - /// (used for temporary storage due to lifetime issues) - pub pattern_vals: Vec, - /// A list of symbolic patterns with their percent chance of occurrence - pub pattern_percentages: Vec<(String, f64)>, - /// A list of symbolic patterns with a running total of percent chance of occurrence, in increasing order - pub pattern_ranks: Vec<(String, f64)>, - /// A list of pattern lengths with a distinct count of occurrence - pub sizes: SizeMap, - /// the total number of pattern sizes (lengths) in the profile - pub size_total: u32, - /// A list of pattern sizes (lengths) with a running total of their percent chance of occurrence, in increasing order - pub size_ranks: Vec<(u32, f64)>, - /// The number of processors used to distribute the work load (multi-thread) while finding Facts to generate data - pub processors: u8, - /// A list of processors (which are lists of Facts) that store all the Facts in the profile - pub facts: Vec>, + pub patterns: PatternMap, + /// The total number of patterns in the profile + pub pattern_total: u32, + /// A list of symbolic patterns in the profile + /// (used for temporary storage due to lifetime issues) + pub pattern_keys: Vec, + /// A list of distinct counts for patterns in the profile + /// (used for temporary storage due to lifetime issues) + pub pattern_vals: Vec, + /// A list of symbolic patterns with their percent chance of occurrence + pub pattern_percentages: Vec<(String, f64)>, + /// A list of symbolic patterns with a running total of percent chance of occurrence, in increasing order + pub pattern_ranks: Vec<(String, f64)>, + /// A list of pattern lengths with a distinct count of occurrence + pub sizes: SizeMap, + /// the total number of pattern sizes (lengths) in the profile + pub size_total: u32, + /// A list of pattern sizes (lengths) with a running total of their percent chance of occurrence, in increasing order + pub size_ranks: Vec<(u32, f64)>, + /// The number of processors used to distribute the work load (multi-thread) while finding Facts to generate data + pub processors: u8, + /// A list of processors (which are lists of Facts) that store all the Facts in the profile + pub facts: Vec>, } impl Profile { - /// Constructs a new Profile - /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let placeholder = Profile::new(); - /// } - /// ``` - pub fn new() -> Profile { - Profile { - id: None, - patterns: PatternMap::new(), - pattern_total: 0, - pattern_keys: Vec::new(), - pattern_vals: Vec::new(), - pattern_percentages: Vec::new(), - pattern_ranks: Vec::new(), - sizes: SizeMap::new(), - size_total: 0, - size_ranks: Vec::new(), - processors: 4, - facts: Profile::new_facts(4), - } - } - - /// Constructs a new Profile using an identifier - /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let placeholder = Profile::new_with_id("12345".to_string()); - /// } - /// ``` - pub fn new_with_id(id: String) -> Profile { - Profile { - id: Some(id), - patterns: PatternMap::new(), - pattern_total: 0, - pattern_keys: Vec::new(), - pattern_vals: Vec::new(), - pattern_percentages: Vec::new(), - pattern_ranks: Vec::new(), - sizes: SizeMap::new(), - size_total: 0, - size_ranks: Vec::new(), - processors: 4, - facts: Profile::new_facts(4), - } - } - - /// Constructs a new Profile with a specified number of processors to analyze the data. - /// Each processor shares the load of generating the data based on the Facts it has been assigned to manage. - /// - /// # Arguments - /// - /// * `p: u8` - A number that sets the number of processors to start up to manage the Facts.
- /// Increasing the number of processors will speed up the generator be distributing the workload. - /// The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)
- /// NOTE: The default number of processors is 4. - /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let processors: u8 = 10; - /// let placeholder = Profile::new_with_processors(processors); - /// } - /// ``` - pub fn new_with_processors(p: u8) -> Profile { - Profile { - id: None, - patterns: PatternMap::new(), - pattern_total: 0, - pattern_keys: Vec::new(), - pattern_vals: Vec::new(), - pattern_percentages: Vec::new(), - pattern_ranks: Vec::new(), - sizes: SizeMap::new(), - size_total: 0, - size_ranks: Vec::new(), - processors: p, - facts: Profile::new_facts(p), - } - } - - /// Constructs a new Profile from an exported JSON file. This is used when restoring from "archive" - /// - /// # Arguments - /// - /// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
- /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let mut profile = Profile::from_file("./tests/samples/sample-00-profile"); + /// Constructs a new Profile + /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let placeholder = Profile::new(); + /// } + /// ``` + pub fn new() -> Profile { + Profile { + id: None, + patterns: PatternMap::new(), + pattern_total: 0, + pattern_keys: Vec::new(), + pattern_vals: Vec::new(), + pattern_percentages: Vec::new(), + pattern_ranks: Vec::new(), + sizes: SizeMap::new(), + size_total: 0, + size_ranks: Vec::new(), + processors: 4, + facts: Profile::new_facts(4), + } + } + + /// Constructs a new Profile using an identifier + /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let placeholder = Profile::new_with_id("12345".to_string()); + /// } + /// ``` + pub fn new_with_id(id: String) -> Profile { + Profile { + id: Some(id), + patterns: PatternMap::new(), + pattern_total: 0, + pattern_keys: Vec::new(), + pattern_vals: Vec::new(), + pattern_percentages: Vec::new(), + pattern_ranks: Vec::new(), + sizes: SizeMap::new(), + size_total: 0, + size_ranks: Vec::new(), + processors: 4, + facts: Profile::new_facts(4), + } + } + + /// Constructs a new Profile with a specified number of processors to analyze the data. + /// Each processor shares the load of generating the data based on the Facts it has been assigned to manage. + /// + /// # Arguments + /// + /// * `p: u8` - A number that sets the number of processors to start up to manage the Facts.
+ /// Increasing the number of processors will speed up the generator be distributing the workload. + /// The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)
+ /// NOTE: The default number of processors is 4. + /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let processors: u8 = 10; + /// let placeholder = Profile::new_with_processors(processors); + /// } + /// ``` + pub fn new_with_processors(p: u8) -> Profile { + Profile { + id: None, + patterns: PatternMap::new(), + pattern_total: 0, + pattern_keys: Vec::new(), + pattern_vals: Vec::new(), + pattern_percentages: Vec::new(), + pattern_ranks: Vec::new(), + sizes: SizeMap::new(), + size_total: 0, + size_ranks: Vec::new(), + processors: p, + facts: Profile::new_facts(p), + } + } + + /// Constructs a new Profile from an exported JSON file. This is used when restoring from "archive" + /// + /// # Arguments + /// + /// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
+ /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let mut profile = Profile::from_file("./tests/samples/sample-00-profile"); /// /// profile.pre_generate(); /// /// println!("The generated name is {:?}", profile.generate()); - /// } + /// } /// ``` - pub fn from_file(path: &'static str) -> Profile { - // open the archive file - let mut file = match File::open(format!("{}.json",&path)) { - Err(_e) => { - error!("Could not open file {:?}", &path.to_string()); - panic!("Could not open file {:?}", &path.to_string()); - }, - Ok(f) => { - info!("Successfully opened file {:?}", &path.to_string()); - f - }, - }; - - //read the archive file - let mut serialized = String::new(); - match file.read_to_string(&mut serialized) { - Err(e) => { - error!("Could not read file {:?} because of {:?}", &path.to_string(), e.to_string()); - panic!("Could not read file {:?} because of {:?}", &path.to_string(), e.to_string()); - }, - Ok(s) => { - info!("Successfully read file {:?}", &path.to_string()); - s - }, - }; + pub fn from_file(path: &'static str) -> Profile { + // open the archive file + let mut file = match File::open(format!("{}.json", &path)) { + Err(_e) => { + error!("Could not open file {:?}", &path.to_string()); + panic!("Could not open file {:?}", &path.to_string()); + } + Ok(f) => { + info!("Successfully opened file {:?}", &path.to_string()); + f + } + }; + + //read the archive file + let mut serialized = String::new(); + match file.read_to_string(&mut serialized) { + Err(e) => { + error!( + "Could not read file {:?} because of {:?}", + &path.to_string(), + e.to_string() + ); + panic!( + "Could not read file {:?} because of {:?}", + &path.to_string(), + e.to_string() + ); + } + Ok(s) => { + info!("Successfully read file {:?}", &path.to_string()); + s + } + }; //serde_json::from_str(&serialized).unwrap() Self::from_serialized(&serialized) - } - - - /// Constructs a new Profile from a serialized (JSON) string of the Profile object. This is used when restoring from "archive" - /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let serialized = "{\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"; + } + + /// Constructs a new Profile from a serialized (JSON) string of the Profile object. This is used when restoring from "archive" + /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let serialized = "{\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"; /// let mut profile = Profile::from_serialized(&serialized); /// /// profile.pre_generate(); /// /// println!("The generated name is {:?}", profile.generate()); - /// } + /// } /// ``` - pub fn from_serialized(serialized: &str) -> Profile { - serde_json::from_str(&serialized).unwrap() - } - - /// This function converts an data point (&str) to a pattern and adds it to the profile - /// - /// # Arguments - /// - /// * `entity: String` - The textual str of the value to anaylze.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + pub fn from_serialized(serialized: &str) -> Profile { + serde_json::from_str(&serialized).unwrap() + } + + /// This function converts an data point (&str) to a pattern and adds it to the profile + /// + /// # Arguments + /// + /// * `entity: String` - The textual str of the value to anaylze.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// profile.analyze("One"); /// profile.analyze("Two"); @@ -381,76 +388,79 @@ impl Profile { /// profile.analyze("Four"); /// /// assert_eq!(profile.patterns.len(), 4); - /// } - /// ``` - pub fn analyze(&mut self, entity: &str) { - let rslt = PatternDefinition::new().analyze(entity); - let _t = self.apply_facts(rslt.0, rslt.1).map_err(|e| { - error!("Warning: Couldn't apply the pattern and facts for the entity {}!", entity); - e.to_string() - }); - } - - /// This function applies the pattern and list of Facts to the profile - /// - /// # Arguments - /// - /// * `pattern: String` - The string the represents the pattern of the entity that was analyzed.
- /// * `facts: Vec` - A Vector containing the Facts based on the analysis (one for each char in the entity).
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// + /// } + /// ``` + pub fn analyze(&mut self, entity: &str) { + let rslt = PatternDefinition::new().analyze(entity); + let _t = self.apply_facts(rslt.0, rslt.1).map_err(|e| { + error!( + "Warning: Couldn't apply the pattern and facts for the entity {}!", + entity + ); + e.to_string() + }); + } + + /// This function applies the pattern and list of Facts to the profile + /// + /// # Arguments + /// + /// * `pattern: String` - The string the represents the pattern of the entity that was analyzed.
+ /// * `facts: Vec` - A Vector containing the Facts based on the analysis (one for each char in the entity).
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// /// use test_data_generation::engine::{Fact, PatternDefinition}; - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let mut profile = Profile::new(); - /// let results = PatternDefinition::new().analyze("Word"); - /// - /// assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1); - /// } - /// ``` - pub fn apply_facts(&mut self, pattern: String, facts: Vec) -> Result{ - // balance the storing of facts across all the vectors that can be processed in parallel - let mut i = 0; - for f in facts.into_iter() { - if i == self.processors { - i = 0; - } - - self.facts[i as usize].push(f); - i = i + 1; - } - - // store the pattern - AddAssign::add_assign(self.patterns.entry(pattern.to_string()).or_insert(0), 1); - - // store the total number of patterns generated so far - self.pattern_total = self.patterns.values().sum::(); - - // analyze sizes - AddAssign::add_assign(self.sizes.entry(pattern.len() as u32).or_insert(0), 1); - self.size_total = self.sizes.values().sum::(); - - self.pattern_keys = self.patterns.keys().cloned().collect(); - self.pattern_vals = self.patterns.values().cloned().collect(); - - Ok(1) - } - - /// This function calculates the patterns to use by the chance they will occur (as cumulative percentage) in decreasing order - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let mut profile = Profile::new(); + /// let results = PatternDefinition::new().analyze("Word"); + /// + /// assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1); + /// } + /// ``` + pub fn apply_facts(&mut self, pattern: String, facts: Vec) -> Result { + // balance the storing of facts across all the vectors that can be processed in parallel + let mut i = 0; + for f in facts.into_iter() { + if i == self.processors { + i = 0; + } + + self.facts[i as usize].push(f); + i = i + 1; + } + + // store the pattern + AddAssign::add_assign(self.patterns.entry(pattern.to_string()).or_insert(0), 1); + + // store the total number of patterns generated so far + self.pattern_total = self.patterns.values().sum::(); + + // analyze sizes + AddAssign::add_assign(self.sizes.entry(pattern.len() as u32).or_insert(0), 1); + self.size_total = self.sizes.values().sum::(); + + self.pattern_keys = self.patterns.keys().cloned().collect(); + self.pattern_vals = self.patterns.values().cloned().collect(); + + Ok(1) + } + + /// This function calculates the patterns to use by the chance they will occur (as cumulative percentage) in decreasing order + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// /// profile.analyze("Smith, John"); @@ -466,51 +476,55 @@ impl Profile { /// /// assert_eq!(profile.pattern_ranks, test); /// } - /// ``` - pub fn cum_patternmap(&mut self) { - // Reference: https://users.rust-lang.org/t/cannot-infer-an-appropriate-lifetime-for-autoref/13360/3 - - debug!("calucating the cumulative percentage of occurences for data point patterns..."); - - // calculate the percentage by patterns - // -> {"CcvccpSCvcc": 14.285714285714285, "CvccvccpSCvccvc": 14.285714285714285, "CvccvccpSCvccvv": 28.57142857142857, "CvcvcccpSCcvcv": 14.285714285714285, "CvcvpSCvccc": 14.285714285714285, "V~CcvvcpSCvccc": 14.285714285714285} - let n = self.patterns.len(); - - // see issue: https://github.com/dsietz/test-data-generation/issues/88 - self.pattern_percentages.clear(); - - for m in 0..n { - self.pattern_percentages.push((self.pattern_keys[m].clone(), (self.pattern_vals[m] as f64 / self.pattern_total as f64) * 100.0)); - } - - // sort the ranks by percentages in decreasing order - // -> [("CvccvccpSCvccvv", 28.57142857142857), ("CcvccpSCvcc", 14.285714285714285), ("CvccvccpSCvccvc", 14.285714285714285), ("CvcvcccpSCcvcv", 14.285714285714285), ("CvcvpSCvccc", 14.285714285714285), ("V~CcvvcpSCvccc", 14.285714285714285)] - self.pattern_percentages.sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap()); - - // calculate the cumulative sum of the pattern rankings - // -> [("CvccvccpSCvccvv", 28.57142857142857), ("CcvccpSCvcc", 42.857142857142854), ("CvccvccpSCvccvc", 57.14285714285714), ("CvcvcccpSCcvcv", 71.42857142857142), ("CvcvpSCvccc", 85.7142857142857), ("V~CcvvcpSCvccc", 99.99999999999997)] - let mut rank: f64 = 0.00; - - // see issue: https://github.com/dsietz/test-data-generation/issues/88 - self.pattern_ranks.clear(); - - for pttrn in self.pattern_percentages.iter() { - let tmp = pttrn.1 + rank; - self.pattern_ranks.push((pttrn.0.clone(),tmp)); - rank = tmp; - } - } + /// ``` + pub fn cum_patternmap(&mut self) { + // Reference: https://users.rust-lang.org/t/cannot-infer-an-appropriate-lifetime-for-autoref/13360/3 + + debug!("calucating the cumulative percentage of occurences for data point patterns..."); + + // calculate the percentage by patterns + // -> {"CcvccpSCvcc": 14.285714285714285, "CvccvccpSCvccvc": 14.285714285714285, "CvccvccpSCvccvv": 28.57142857142857, "CvcvcccpSCcvcv": 14.285714285714285, "CvcvpSCvccc": 14.285714285714285, "V~CcvvcpSCvccc": 14.285714285714285} + let n = self.patterns.len(); + + // see issue: https://github.com/dsietz/test-data-generation/issues/88 + self.pattern_percentages.clear(); + + for m in 0..n { + self.pattern_percentages.push(( + self.pattern_keys[m].clone(), + (self.pattern_vals[m] as f64 / self.pattern_total as f64) * 100.0, + )); + } + + // sort the ranks by percentages in decreasing order + // -> [("CvccvccpSCvccvv", 28.57142857142857), ("CcvccpSCvcc", 14.285714285714285), ("CvccvccpSCvccvc", 14.285714285714285), ("CvcvcccpSCcvcv", 14.285714285714285), ("CvcvpSCvccc", 14.285714285714285), ("V~CcvvcpSCvccc", 14.285714285714285)] + self.pattern_percentages + .sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap()); + + // calculate the cumulative sum of the pattern rankings + // -> [("CvccvccpSCvccvv", 28.57142857142857), ("CcvccpSCvcc", 42.857142857142854), ("CvccvccpSCvccvc", 57.14285714285714), ("CvcvcccpSCcvcv", 71.42857142857142), ("CvcvpSCvccc", 85.7142857142857), ("V~CcvvcpSCvccc", 99.99999999999997)] + let mut rank: f64 = 0.00; + + // see issue: https://github.com/dsietz/test-data-generation/issues/88 + self.pattern_ranks.clear(); + + for pttrn in self.pattern_percentages.iter() { + let tmp = pttrn.1 + rank; + self.pattern_ranks.push((pttrn.0.clone(), tmp)); + rank = tmp; + } + } /// This function calculates the sizes to use by the chance they will occur (as cumulative percentage) in decreasing order - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// profile.analyze("One"); /// profile.analyze("Two"); @@ -524,40 +538,46 @@ impl Profile { /// print!("The size ranks are {:?}", profile.size_ranks); /// // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)] /// } - /// ``` - pub fn cum_sizemap(&mut self) { - debug!("calucating the cumulative percentage of occurences for data point sizes..."); - // calculate the percentage by sizes - // -> {11: 28.57142857142857, 14: 14.285714285714285, 15: 57.14285714285714} - let mut size_ranks = SizeRankMap::new(); - - for key in self.sizes.keys(){ - size_ranks.insert(*key, (*self.sizes.get(key).unwrap() as f64 / self.size_total as f64)*100.0); - } - - // sort the ranks by percentages in decreasing order - // -> [(15, 57.14285714285714), (11, 28.57142857142857), (14, 14.285714285714285)] - let mut sizes = size_ranks.iter().collect::>(); - sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap()); - - // calculate the cumulative sum of the size rankings - // -> [(15, 57.14285714285714), (11, 85.71428571428571), (14, 100)] - self.size_ranks = sizes.iter().scan((0 as u32, 0.00 as f64), |state, &(&k, &v)| { - *state = (k, state.1 + &v); - Some(*state) - }).collect::>(); - } - - /// This function generates realistic test data based on the sampel data that was analyzed. - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// ``` + pub fn cum_sizemap(&mut self) { + debug!("calucating the cumulative percentage of occurences for data point sizes..."); + // calculate the percentage by sizes + // -> {11: 28.57142857142857, 14: 14.285714285714285, 15: 57.14285714285714} + let mut size_ranks = SizeRankMap::new(); + + for key in self.sizes.keys() { + size_ranks.insert( + *key, + (*self.sizes.get(key).unwrap() as f64 / self.size_total as f64) * 100.0, + ); + } + + // sort the ranks by percentages in decreasing order + // -> [(15, 57.14285714285714), (11, 28.57142857142857), (14, 14.285714285714285)] + let mut sizes = size_ranks.iter().collect::>(); + sizes.sort_by(|&(_, a), &(_, b)| b.partial_cmp(&a).unwrap()); + + // calculate the cumulative sum of the size rankings + // -> [(15, 57.14285714285714), (11, 85.71428571428571), (14, 100)] + self.size_ranks = sizes + .iter() + .scan((0 as u32, 0.00 as f64), |state, &(&k, &v)| { + *state = (k, state.1 + &v); + Some(*state) + }) + .collect::>(); + } + + /// This function generates realistic test data based on the sampel data that was analyzed. + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// /// profile.analyze("One"); @@ -570,40 +590,45 @@ impl Profile { /// /// print!("The test data {:?} was generated.", profile.generate()); /// } - /// ``` - pub fn generate(&mut self) -> String{ - // 1. get a random number - let s: f64 = random_percentage!(); - - // 2. find the first pattern that falls within the percentage chance of occurring - // NOTE: The following 2 lines has been commented out because this doesn't need to - // happen since the patterns are already ranks by percent chance of occurring - // and therefore sizes (lengths) as well since the patterns include the full - // length of the entitiy analyzed. - //let size = self.size_ranks.iter().find(|&&x|&x.1 >= &s).unwrap().0; - //let pattern = self.pattern_ranks.iter().find(|x|&x.1 >= &s && x.0.len() == size as usize).unwrap().clone(); - let pattern = self.pattern_ranks.iter().find(|x|&x.1 >= &s).unwrap().clone(); - - // lastly, generate the test data using facts that adhere to the pattern - let generated = self.generate_from_pattern(pattern.0); - - generated - } - - /// This function generates realistic test data based on the sample data that was analyzed. - /// - /// # Arguments - /// - /// * `pattern: String` - The pattern to reference when generating the test data.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// ``` + pub fn generate(&mut self) -> String { + // 1. get a random number + let s: f64 = random_percentage!(); + + // 2. find the first pattern that falls within the percentage chance of occurring + // NOTE: The following 2 lines has been commented out because this doesn't need to + // happen since the patterns are already ranks by percent chance of occurring + // and therefore sizes (lengths) as well since the patterns include the full + // length of the entitiy analyzed. + //let size = self.size_ranks.iter().find(|&&x|&x.1 >= &s).unwrap().0; + //let pattern = self.pattern_ranks.iter().find(|x|&x.1 >= &s && x.0.len() == size as usize).unwrap().clone(); + let pattern = self + .pattern_ranks + .iter() + .find(|x| &x.1 >= &s) + .unwrap() + .clone(); + + // lastly, generate the test data using facts that adhere to the pattern + let generated = self.generate_from_pattern(pattern.0); + + generated + } + + /// This function generates realistic test data based on the sample data that was analyzed. + /// + /// # Arguments + /// + /// * `pattern: String` - The pattern to reference when generating the test data.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// /// profile.analyze("01/13/2017"); @@ -616,220 +641,222 @@ impl Profile { /// /// assert_eq!(generated.len(), 10); /// } - /// ``` - pub fn generate_from_pattern(&self, pattern: String) -> String { - let pattern_chars = pattern.chars().collect::>(); - let mut generated = String::new(); - let prev_char = ' '; - - // iterate through the chars in the pattern string - for (idx, ch) in pattern_chars.iter().enumerate() { - match crossbeam::scope(|scope| { - let c = ch; - let starts = if idx == 0 { 1 } else { 0 }; - let ends = if idx == pattern_chars.len()-1 { 1 } else { 0 }; - let mut fact_options = vec![]; - let prior_char = prev_char; - - // iterate through the processors (vec) that hold the lists (vec) of facts - for v in &self.facts { - let selected_facts = scope.spawn(move |_| { - let mut facts = vec![]; - - // iterate through the list of facts - for value in v { - if value.starts_with == starts && - value.ends_with == ends && - value.pattern_placeholder == *c && - value.index_offset == idx as u32 { - facts.push(value.key.clone()); - - // if the value.key's prior char matches the prior generated char, then weight the value.key - // to increase the chance of it being used when generated - if value.prior_key.unwrap_or(' ') == prior_char { - facts.push(value.key.clone()); - facts.push(value.key.clone()); - } - - // if the value.key's index_offset matches the current index, then weight the value.key - // to increase the chance of it being used when generated - if value.index_offset == idx as u32 { - facts.push(value.key.clone()); - facts.push(value.key.clone()); - } - } - } - - facts - }); - - //append the selected_facts to the fact_options - //fact_options.extend_from_slice(&selected_facts.join()); - match selected_facts.join() { - Ok(sf) => fact_options.extend_from_slice(&sf), - Err(err) => { - error!("{:?}", err); - panic!("{:?}", err); - } - } - } - - //select a fact to use as the generated char - let rnd_start = 0; - let rnd_end = fact_options.len()-1; - - if rnd_start >= rnd_end { - //generated.push(fact_options[0 as usize]); - fact_options[0 as usize] - }else{ - let x: u32 = random_between!(rnd_start, rnd_end); - //prev_char = fact_options[x as usize]; - //generated.push(prev_char); - fact_options[x as usize] - } - }) { - Ok(c) => generated.push(c), - Err(err) => { - error!("{:?}", err); - panic!("{:?}", err); - }, - } - } - - generated - } - - /// This function learns by measuring how realistic the test data it generates to the sample data that was provided. - /// - /// # Arguments - /// - /// * `control_list: Vec` - The list of strings to compare against. This would be the real data from the data sample.
- /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let mut profil = Profile::new(); - /// let sample_data = vec!("Smith, John".to_string(),"Doe, John".to_string(),"Dale, Danny".to_string(),"Rickets, Ronney".to_string()); - /// - /// for sample in sample_data.iter().clone() { - /// profil.analyze(&sample); - /// } - /// - /// // in order to learn the profile must be prepared with pre_genrate() - /// // so it can generate data to learn from - /// profil.pre_generate(); - /// - /// let learning = profil.learn_from_entity(sample_data).unwrap(); - /// - /// assert_eq!(learning, true); + /// ``` + pub fn generate_from_pattern(&self, pattern: String) -> String { + let pattern_chars = pattern.chars().collect::>(); + let mut generated = String::new(); + let prev_char = ' '; + + // iterate through the chars in the pattern string + for (idx, ch) in pattern_chars.iter().enumerate() { + match crossbeam::scope(|scope| { + let c = ch; + let starts = if idx == 0 { 1 } else { 0 }; + let ends = if idx == pattern_chars.len() - 1 { 1 } else { 0 }; + let mut fact_options = vec![]; + let prior_char = prev_char; + + // iterate through the processors (vec) that hold the lists (vec) of facts + for v in &self.facts { + let selected_facts = scope.spawn(move |_| { + let mut facts = vec![]; + + // iterate through the list of facts + for value in v { + if value.starts_with == starts + && value.ends_with == ends + && value.pattern_placeholder == *c + && value.index_offset == idx as u32 + { + facts.push(value.key.clone()); + + // if the value.key's prior char matches the prior generated char, then weight the value.key + // to increase the chance of it being used when generated + if value.prior_key.unwrap_or(' ') == prior_char { + facts.push(value.key.clone()); + facts.push(value.key.clone()); + } + + // if the value.key's index_offset matches the current index, then weight the value.key + // to increase the chance of it being used when generated + if value.index_offset == idx as u32 { + facts.push(value.key.clone()); + facts.push(value.key.clone()); + } + } + } + + facts + }); + + //append the selected_facts to the fact_options + //fact_options.extend_from_slice(&selected_facts.join()); + match selected_facts.join() { + Ok(sf) => fact_options.extend_from_slice(&sf), + Err(err) => { + error!("{:?}", err); + panic!("{:?}", err); + } + } + } + + //select a fact to use as the generated char + let rnd_start = 0; + let rnd_end = fact_options.len() - 1; + + if rnd_start >= rnd_end { + //generated.push(fact_options[0 as usize]); + fact_options[0 as usize] + } else { + let x: u32 = random_between!(rnd_start, rnd_end); + //prev_char = fact_options[x as usize]; + //generated.push(prev_char); + fact_options[x as usize] + } + }) { + Ok(c) => generated.push(c), + Err(err) => { + error!("{:?}", err); + panic!("{:?}", err); + } + } + } + + generated + } + + /// This function learns by measuring how realistic the test data it generates to the sample data that was provided. + /// + /// # Arguments + /// + /// * `control_list: Vec` - The list of strings to compare against. This would be the real data from the data sample.
+ /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let mut profil = Profile::new(); + /// let sample_data = vec!("Smith, John".to_string(),"Doe, John".to_string(),"Dale, Danny".to_string(),"Rickets, Ronney".to_string()); + /// + /// for sample in sample_data.iter().clone() { + /// profil.analyze(&sample); + /// } + /// + /// // in order to learn the profile must be prepared with pre_genrate() + /// // so it can generate data to learn from + /// profil.pre_generate(); + /// + /// let learning = profil.learn_from_entity(sample_data).unwrap(); + /// + /// assert_eq!(learning, true); + /// } + /// ``` + pub fn learn_from_entity(&mut self, control_list: Vec) -> Result { + for _n in 0..10 { + let experiment = self.generate(); + let mut percent_similarity: Vec = Vec::new(); + + for control in control_list.iter().clone() { + debug!("Comparing {} with {} ...", &control, &experiment); + percent_similarity.push(self.realistic_test(&control, &experiment)); + } + + let percent = + percent_similarity.iter().sum::() as f64 / percent_similarity.len() as f64; + debug!("Percent similarity is {} ...", &percent); + + if percent >= 80 as f64 { + self.analyze(&experiment); + } + } + + Ok(true) + } + + /// This function calculates the levenshtein distance between 2 strings. + /// See: https://crates.io/crates/levenshtein + /// + /// # Arguments + /// + /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+ /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
+ /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let mut profile = Profile::new(); + /// + /// assert_eq!(profile.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); + /// } + /// + pub fn levenshtein_distance(&mut self, control: &String, experiment: &String) -> usize { + // https://docs.rs/levenshtein/1.0.3/levenshtein/fn.levenshtein.html + levenshtein_distance!(control, experiment) + } + + /// This function calculates the percent difference between 2 strings. + /// + /// # Arguments + /// + /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+ /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
+ /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// let mut profile = Profile::new(); + /// + /// assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); /// } - /// ``` - pub fn learn_from_entity(&mut self, control_list: Vec) -> Result { - for _n in 0..10 { - let experiment = self.generate(); - let mut percent_similarity: Vec = Vec::new(); - - for control in control_list.iter().clone() { - debug!("Comparing {} with {} ...", &control, &experiment); - percent_similarity.push(self.realistic_test(&control, &experiment)); - } - - let percent = percent_similarity.iter().sum::() as f64 / percent_similarity.len() as f64; - debug!("Percent similarity is {} ...", &percent); - - if percent >= 80 as f64 { - self.analyze(&experiment); - } - } - - Ok(true) - } - - /// This function calculates the levenshtein distance between 2 strings. - /// See: https://crates.io/crates/levenshtein - /// - /// # Arguments - /// - /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
- /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
- /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let mut profile = Profile::new(); - /// - /// assert_eq!(profile.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); - /// } - /// - pub fn levenshtein_distance(&mut self, control: &String, experiment: &String) -> usize { - // https://docs.rs/levenshtein/1.0.3/levenshtein/fn.levenshtein.html - levenshtein_distance!(control, experiment) - } - - /// This function calculates the percent difference between 2 strings. - /// - /// # Arguments - /// - /// * `control: &String` - The string to compare against. This would be the real data from the data sample.
- /// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
- /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// let mut profile = Profile::new(); - /// - /// assert_eq!(profile.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); - /// } - /// - pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 { - realistic_test!(control, experiment) - } - - /// This function is called from within the implementated structure and returns a list processors (Vec) with empty lists (Vec) for their Facts. - /// Each processor shares the load of generating the data based on the Facts it has been assigned to manage. - /// - /// # Arguments - /// - /// * `p: u8` - A number that sets the number of processors to start up to manage the Facts.
- /// Increasing the number of processors will speed up the generator be ditributing the workload. - /// The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)
- /// NOTE: The default number of processors is 4. - /// - fn new_facts(p: u8) -> Vec> { - let mut vec_main = Vec::new(); - - for _ in 0..p { - vec_main.push(Vec::new()); - } - - vec_main - } - - /// This function prepares the size a pattern accumulated percentages order by percentage increasing - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// + pub fn realistic_test(&mut self, control: &String, experiment: &String) -> f64 { + realistic_test!(control, experiment) + } + + /// This function is called from within the implementated structure and returns a list processors (Vec) with empty lists (Vec) for their Facts. + /// Each processor shares the load of generating the data based on the Facts it has been assigned to manage. + /// + /// # Arguments + /// + /// * `p: u8` - A number that sets the number of processors to start up to manage the Facts.
+ /// Increasing the number of processors will speed up the generator be ditributing the workload. + /// The recommended number of processors is 1 per 10K data points (e.g.: profiling 20K names should be handled by 2 processors)
+ /// NOTE: The default number of processors is 4. + /// + fn new_facts(p: u8) -> Vec> { + let mut vec_main = Vec::new(); + + for _ in 0..p { + vec_main.push(Vec::new()); + } + + vec_main + } + + /// This function prepares the size a pattern accumulated percentages order by percentage increasing + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// profile.analyze("One"); /// profile.analyze("Two"); @@ -843,25 +870,25 @@ impl Profile { /// print!("The size ranks are {:?}", profile.size_ranks); /// // The size ranks are [(3, 50), (4, 83.33333333333333), (5, 100)] /// } - /// ``` - pub fn pre_generate(&mut self){ - info!("Preparing the profile for data generation..."); - self.cum_sizemap(); - self.cum_patternmap(); - info!("Profile: preparing generator..."); - } - - /// This function resets the patterns that the Profile has analyzed. - /// Call this method whenever you wish to "clear" the Profile - /// - /// # Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { + /// ``` + pub fn pre_generate(&mut self) { + info!("Preparing the profile for data generation..."); + self.cum_sizemap(); + self.cum_patternmap(); + info!("Profile: preparing generator..."); + } + + /// This function resets the patterns that the Profile has analyzed. + /// Call this method whenever you wish to "clear" the Profile + /// + /// # Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { /// let mut profile = Profile::new(); /// /// profile.analyze("One"); @@ -885,336 +912,361 @@ impl Profile { /// assert_eq!(x, 3); /// assert_eq!(y, 5); /// } - /// ``` - pub fn reset_analyze(&mut self) { - info!("Resetting the profile ..."); - self.patterns = PatternMap::new(); - info!("Profile: patterns have been reset ..."); - } - - /// This function saves (exports) the Profile to a JSON file. - /// This is useful when you wish to reuse the algorithm to generate more test data later. - /// - /// # Arguments - /// - /// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
- /// - /// #Errors - /// If this function encounters any form of I/O or other error, an error variant will be returned. - /// Otherwise, the function returns Ok(true).
- /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// // analyze the dataset - /// let mut profile = Profile::new(); - /// profile.analyze("Smith, John"); + /// ``` + pub fn reset_analyze(&mut self) { + info!("Resetting the profile ..."); + self.patterns = PatternMap::new(); + info!("Profile: patterns have been reset ..."); + } + + /// This function saves (exports) the Profile to a JSON file. + /// This is useful when you wish to reuse the algorithm to generate more test data later. + /// + /// # Arguments + /// + /// * `field: String` - The full path of the export file , excluding the file extension, (e.g.: "./test/data/custom-names").
+ /// + /// #Errors + /// If this function encounters any form of I/O or other error, an error variant will be returned. + /// Otherwise, the function returns Ok(true).
+ /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// // analyze the dataset + /// let mut profile = Profile::new(); + /// profile.analyze("Smith, John"); /// profile.analyze("O'Brian, Henny"); /// profile.analyze("Dale, Danny"); /// profile.analyze("Rickets, Ronney"); /// /// profile.pre_generate(); - /// + /// /// assert_eq!(profile.save("./tests/samples/sample-00-profile").unwrap(), true); - /// } - /// - pub fn save(&mut self, path: &'static str) -> Result { - let dsp_json = serde_json::to_string(&self).unwrap(); - - // Create the archive file - let mut file = match File::create(format!("{}.json",&path)) { - Err(e) => { - error!("Could not create file {:?}", &path.to_string()); - return Err(e); - }, - Ok(f) => { - info!("Successfully exported to {:?}", &path.to_string()); - f - }, - }; - - // Write the json string to file, returns io::Result<()> - match file.write_all(dsp_json.as_bytes()) { - Err(e) => { - error!("Could not write to file {}", &path.to_string()); - return Err(e); - }, - Ok(_) => { - info!("Successfully exported to {}", &path.to_string()); - }, - }; - - Ok(true) - } - - /// This function converts the Profile to a serialize JSON string. - /// - /// #Example - /// - /// ```rust - /// extern crate test_data_generation; - /// - /// use test_data_generation::Profile; - /// - /// fn main() { - /// // analyze the dataset - /// let mut data_profile = Profile::new(); - /// - /// // analyze the dataset - /// data_profile.analyze("OK"); - /// + /// } + /// + pub fn save(&mut self, path: &'static str) -> Result { + let dsp_json = serde_json::to_string(&self).unwrap(); + + // Create the archive file + let mut file = match File::create(format!("{}.json", &path)) { + Err(e) => { + error!("Could not create file {:?}", &path.to_string()); + return Err(e); + } + Ok(f) => { + info!("Successfully exported to {:?}", &path.to_string()); + f + } + }; + + // Write the json string to file, returns io::Result<()> + match file.write_all(dsp_json.as_bytes()) { + Err(e) => { + error!("Could not write to file {}", &path.to_string()); + return Err(e); + } + Ok(_) => { + info!("Successfully exported to {}", &path.to_string()); + } + }; + + Ok(true) + } + + /// This function converts the Profile to a serialize JSON string. + /// + /// #Example + /// + /// ```rust + /// extern crate test_data_generation; + /// + /// use test_data_generation::Profile; + /// + /// fn main() { + /// // analyze the dataset + /// let mut data_profile = Profile::new(); + /// + /// // analyze the dataset + /// data_profile.analyze("OK"); + /// /// println!("{}", data_profile.serialize()); /// // {"patterns":{"VC":1},"pattern_total":1,"pattern_keys":["VC"],"pattern_vals":[1],"pattern_percentages":[],"pattern_ranks":[],"sizes":{"2":1},"size_total":1,"size_ranks":[],"processors":4,"facts":[[{"key":"O","prior_key":null,"next_key":"K","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"K","prior_key":"O","next_key":null,"pattern_placeholder":"C","starts_with":0,"ends_with":1,"index_offset":1}],[],[]]} - /// } - /// - pub fn serialize(&mut self) ->String { - serde_json::to_string(&self).unwrap() - } + /// } + /// + pub fn serialize(&mut self) -> String { + serde_json::to_string(&self).unwrap() + } } #[macro_use] pub mod macros; -pub mod shared; -pub mod data_sample_parser; pub mod configs; +pub mod data_sample_parser; pub mod engine; +pub mod shared; // Unit Tests #[cfg(test)] mod tests { use super::*; - #[test] - fn apply_facts(){ - let mut profile = Profile::new(); - let results = PatternDefinition::new().analyze("Word"); + #[test] + fn apply_facts() { + let mut profile = Profile::new(); + let results = PatternDefinition::new().analyze("Word"); - assert_eq!(profile.apply_facts(results.0, results.1).unwrap(),1); - } + assert_eq!(profile.apply_facts(results.0, results.1).unwrap(), 1); + } - #[test] - fn levenshtein_test(){ - let mut profil = Profile::new(); + #[test] + fn levenshtein_test() { + let mut profil = Profile::new(); - assert_eq!(profil.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), 3 as usize); + assert_eq!( + profil.levenshtein_distance(&"kitten".to_string(), &"sitting".to_string()), + 3 as usize + ); } - #[test] - fn realistic_data_test(){ - let mut profil = Profile::new(); + #[test] + fn realistic_data_test() { + let mut profil = Profile::new(); - assert_eq!(profil.realistic_test(&"kitten".to_string(), &"sitting".to_string()), 76.92307692307692 as f64); - } + assert_eq!( + profil.realistic_test(&"kitten".to_string(), &"sitting".to_string()), + 76.92307692307692 as f64 + ); + } + + #[test] + fn learn_from_entity() { + let mut profil = Profile::new(); + let sample_data = vec![ + "Smith, John".to_string(), + "Doe, John".to_string(), + "Dale, Danny".to_string(), + "Rickets, Ronney".to_string(), + ]; - #[test] - fn learn_from_entity(){ - let mut profil = Profile::new(); - let sample_data = vec!("Smith, John".to_string(),"Doe, John".to_string(),"Dale, Danny".to_string(),"Rickets, Ronney".to_string()); + for sample in sample_data.iter().clone() { + profil.analyze(&sample); + } - for sample in sample_data.iter().clone() { - profil.analyze(&sample); - } + profil.pre_generate(); - profil.pre_generate(); + let learning = profil.learn_from_entity(sample_data).unwrap(); - let learning = profil.learn_from_entity(sample_data).unwrap(); + assert_eq!(learning, true); + } - assert_eq!(learning, true); - } + #[test] + fn logging_test() { + let mut profile = Profile::new(); + profile.reset_analyze(); - #[test] - fn logging_test(){ - let mut profile = Profile::new(); - profile.reset_analyze(); + assert!(true); + } - assert!(true); - } - - #[test] - fn new_profile_with_id(){ - let mut profile = Profile::new_with_id("12345".to_string()); - profile.pre_generate(); + #[test] + fn new_profile_with_id() { + let mut profile = Profile::new_with_id("12345".to_string()); + profile.pre_generate(); - assert_eq!(profile.id.unwrap(), "12345".to_string()); + assert_eq!(profile.id.unwrap(), "12345".to_string()); } - #[test] - fn new_profile_from_file(){ - let mut profile = Profile::from_file("./tests/samples/sample-00-profile"); - profile.pre_generate(); + #[test] + fn new_profile_from_file() { + let mut profile = Profile::from_file("./tests/samples/sample-00-profile"); + profile.pre_generate(); - assert!(profile.generate().len() > 0); + assert!(profile.generate().len() > 0); } #[test] #[should_panic] - fn new_profile_from_file_bad_data(){ - let mut profile = Profile::from_file("./tests/samples/not-readable"); - profile.pre_generate(); + fn new_profile_from_file_bad_data() { + let mut profile = Profile::from_file("./tests/samples/not-readable"); + profile.pre_generate(); - assert!(profile.generate().len() > 0); + assert!(profile.generate().len() > 0); } #[test] #[should_panic(expected = "Could not open file \"./tests/samples/bad-path\"")] - fn new_profile_from_file_bad_path(){ - let mut profile = Profile::from_file("./tests/samples/bad-path"); - profile.pre_generate(); + fn new_profile_from_file_bad_path() { + let mut profile = Profile::from_file("./tests/samples/bad-path"); + profile.pre_generate(); - assert!(profile.generate().len() > 0); + assert!(profile.generate().len() > 0); } - #[test] - fn new_profile_from_serialized(){ - let serialized = "{\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"; - let mut profile = Profile::from_serialized(&serialized); - profile.pre_generate(); + #[test] + fn new_profile_from_serialized() { + let serialized = "{\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"; + let mut profile = Profile::from_serialized(&serialized); + profile.pre_generate(); - assert_eq!(profile.generate(), "OK"); + assert_eq!(profile.generate(), "OK"); } #[test] - fn new_profile_new_with(){ - let profile = Profile::new_with_processors(10); + fn new_profile_new_with() { + let profile = Profile::new_with_processors(10); assert_eq!(profile.processors, 10); - } + } #[test] // ensure Profile is analyzing all the sample data points - fn profile_analyze(){ - let mut profil = Profile::new(); - profil.analyze("Smith, John"); - profil.analyze("O'Brian, Henny"); - profil.analyze("Dale, Danny"); - profil.analyze("Rickets, Ronney"); - - assert_eq!(profil.patterns.len(), 4); + fn profile_analyze() { + let mut profil = Profile::new(); + profil.analyze("Smith, John"); + profil.analyze("O'Brian, Henny"); + profil.analyze("Dale, Danny"); + profil.analyze("Rickets, Ronney"); + + assert_eq!(profil.patterns.len(), 4); } #[test] // ensure Profile is able to find the facts that relate to a pattern // NOTE: Dates need work! e.g.: 00/15/0027 - fn profile_generate_from_pattern_date(){ - let mut profil = Profile::new(); - profil.analyze("01/13/2017"); - profil.analyze("11/24/2017"); - profil.analyze("08/05/2017"); + fn profile_generate_from_pattern_date() { + let mut profil = Profile::new(); + profil.analyze("01/13/2017"); + profil.analyze("11/24/2017"); + profil.analyze("08/05/2017"); - profil.pre_generate(); - let generated = profil.generate_from_pattern("##p##p####".to_string()); + profil.pre_generate(); + let generated = profil.generate_from_pattern("##p##p####".to_string()); - assert_eq!(10, generated.len()); + assert_eq!(10, generated.len()); } #[test] // ensure Profile is able to find the facts that relate to a pattern - fn profile_generate_from_pattern_string(){ - let mut profil = Profile::new(); - profil.analyze("First"); - profil.analyze("Next"); - profil.analyze("Last"); + fn profile_generate_from_pattern_string() { + let mut profil = Profile::new(); + profil.analyze("First"); + profil.analyze("Next"); + profil.analyze("Last"); - profil.pre_generate(); - let generated = profil.generate_from_pattern("Cvcc".to_string()); + profil.pre_generate(); + let generated = profil.generate_from_pattern("Cvcc".to_string()); - assert_eq!(4, generated.len()); + assert_eq!(4, generated.len()); } #[test] // ensure Profile is generating correct test data - fn profile_generate(){ - let mut profil = Profile::new(); - profil.analyze("Smith, John"); - profil.analyze("O'Brian, Henny"); - profil.analyze("Dale, Danny"); - profil.analyze("Rickets, Ronnae"); - profil.analyze("Richard, Richie"); - profil.analyze("Roberts, Blake"); - profil.analyze("Conways, Sephen"); - - profil.pre_generate(); - - assert!(profil.generate().len() > 10); + fn profile_generate() { + let mut profil = Profile::new(); + profil.analyze("Smith, John"); + profil.analyze("O'Brian, Henny"); + profil.analyze("Dale, Danny"); + profil.analyze("Rickets, Ronnae"); + profil.analyze("Richard, Richie"); + profil.analyze("Roberts, Blake"); + profil.analyze("Conways, Sephen"); + + profil.pre_generate(); + + assert!(profil.generate().len() > 10); } #[test] // issue #31 // ensure Profile doesn't generate a name with a backslash preceding an apostrophe - fn profile_generate_with_apostrophe(){ - let mut profil = Profile::new(); - profil.analyze("O'Brien"); + fn profile_generate_with_apostrophe() { + let mut profil = Profile::new(); + profil.analyze("O'Brien"); - profil.pre_generate(); - let generated = profil.generate(); + profil.pre_generate(); + let generated = profil.generate(); - assert_eq!(generated, "O'Brien"); + assert_eq!(generated, "O'Brien"); } #[test] // ensure Profile is providing the correct pattern ranks after analyzing the sample data - fn profile_pregenerate_patterns(){ - let mut profil = Profile::new(); - profil.analyze("Smith, John"); - profil.analyze("O'Brian, Henny"); - profil.analyze("Dale, Danny"); - profil.analyze("Rickets, Ronnae"); - profil.analyze("Richard, Richie"); - profil.analyze("Roberts, Blake"); - profil.analyze("Conways, Sephen"); - - profil.pre_generate(); - let test = [("CvccvccpSCvccvv".to_string(), 28.57142857142857 as f64), ("CcvccpSCvcc".to_string(), 42.857142857142854 as f64), ("CvccvccpSCvccvc".to_string(), 57.14285714285714 as f64), ("CvcvcccpSCcvcv".to_string(), 71.42857142857142 as f64), ("CvcvpSCvccc".to_string(), 85.7142857142857 as f64), ("V@CcvvcpSCvccc".to_string(), 99.99999999999997 as f64)]; - - assert_eq!(profil.pattern_ranks, test); + fn profile_pregenerate_patterns() { + let mut profil = Profile::new(); + profil.analyze("Smith, John"); + profil.analyze("O'Brian, Henny"); + profil.analyze("Dale, Danny"); + profil.analyze("Rickets, Ronnae"); + profil.analyze("Richard, Richie"); + profil.analyze("Roberts, Blake"); + profil.analyze("Conways, Sephen"); + + profil.pre_generate(); + let test = [ + ("CvccvccpSCvccvv".to_string(), 28.57142857142857 as f64), + ("CcvccpSCvcc".to_string(), 42.857142857142854 as f64), + ("CvccvccpSCvccvc".to_string(), 57.14285714285714 as f64), + ("CvcvcccpSCcvcv".to_string(), 71.42857142857142 as f64), + ("CvcvpSCvccc".to_string(), 85.7142857142857 as f64), + ("V@CcvvcpSCvccc".to_string(), 99.99999999999997 as f64), + ]; + + assert_eq!(profil.pattern_ranks, test); } #[test] // ensure Profile is providing the correct pattern ranks after analyzing the sample data - fn profile_pregenerate_sizes(){ - let mut profil = Profile::new(); - - profil.analyze("Smith, Johny"); //12 - profil.analyze("O'Brian, Hen"); //12 - profil.analyze("Dale, Danny"); //11 - profil.analyze("O'Henry, Al"); //11 - profil.analyze("Rickets, Ro"); //11 - profil.analyze("Mr. Wilbers"); //11 - profil.analyze("Po, Al"); //6 - - profil.pre_generate(); - let test = [(11, 57.14285714285714), (12, 85.71428571428571), (6, 100 as f64)]; - - assert_eq!(profil.size_ranks, test); + fn profile_pregenerate_sizes() { + let mut profil = Profile::new(); + + profil.analyze("Smith, Johny"); //12 + profil.analyze("O'Brian, Hen"); //12 + profil.analyze("Dale, Danny"); //11 + profil.analyze("O'Henry, Al"); //11 + profil.analyze("Rickets, Ro"); //11 + profil.analyze("Mr. Wilbers"); //11 + profil.analyze("Po, Al"); //6 + + profil.pre_generate(); + let test = [ + (11, 57.14285714285714), + (12, 85.71428571428571), + (6, 100 as f64), + ]; + + assert_eq!(profil.size_ranks, test); } - #[test] - fn save_profile(){ - let mut profile = Profile::new(); - profile.analyze("Smith, John"); - profile.analyze("O'Brian, Henny"); - profile.analyze("Dale, Danny"); - profile.analyze("Rickets, Ronney"); - - profile.pre_generate(); - - assert_eq!(profile.save("./tests/samples/sample-00-profile").unwrap(), true); - } + #[test] + fn save_profile() { + let mut profile = Profile::new(); + profile.analyze("Smith, John"); + profile.analyze("O'Brian, Henny"); + profile.analyze("Dale, Danny"); + profile.analyze("Rickets, Ronney"); + + profile.pre_generate(); + + assert_eq!( + profile.save("./tests/samples/sample-00-profile").unwrap(), + true + ); + } #[test] // ensure a Profile can be exported (to be archived) as JSON - fn serialize(){ - let mut profil = Profile::new(); + fn serialize() { + let mut profil = Profile::new(); - // analyze the dataset - profil.analyze("OK"); + // analyze the dataset + profil.analyze("OK"); - let serialized = profil.serialize(); - assert_eq!(serialized, "{\"id\":null,\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"); + let serialized = profil.serialize(); + assert_eq!(serialized, "{\"id\":null,\"patterns\":{\"VC\":1},\"pattern_total\":1,\"pattern_keys\":[\"VC\"],\"pattern_vals\":[1],\"pattern_percentages\":[],\"pattern_ranks\":[],\"sizes\":{\"2\":1},\"size_total\":1,\"size_ranks\":[],\"processors\":4,\"facts\":[[{\"key\":\"O\",\"prior_key\":null,\"next_key\":\"K\",\"pattern_placeholder\":\"V\",\"starts_with\":1,\"ends_with\":0,\"index_offset\":0}],[{\"key\":\"K\",\"prior_key\":\"O\",\"next_key\":null,\"pattern_placeholder\":\"C\",\"starts_with\":0,\"ends_with\":1,\"index_offset\":1}],[],[]]}"); } } diff --git a/src/macros.rs b/src/macros.rs index 56925f8..a5fa1d7 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,111 +1,102 @@ -/// This macro calculates the levenshtein distance between 2 strings. -/// See: https://crates.io/crates/levenshtein -/// -/// # Arguments -/// -/// * `control: &String` - The string to compare against. This would be the real data from the data sample.
-/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
-/// -/// #Example -/// -/// ```rust -/// # #[macro_use] extern crate test_data_generation; extern crate levenshtein; -/// # fn main() { -/// assert_eq!(levenshtein_distance!("kitten", "sitting"), 3 as usize); -/// # } -/// -#[macro_export] -macro_rules! levenshtein_distance { - ( $c:expr, $e:expr ) => { - { - use levenshtein; - - levenshtein::levenshtein($c, $e) - } - } -} - -/// This macro generates a random number between 0 and 100. -/// Returns a f64. -/// -/// # Example -/// -/// ```rust -/// # #[macro_use] extern crate test_data_generation; extern crate rand; -/// # fn main() { -/// let rnd: f64 = random_percentage!(); -/// println!("Your random number is {}", rnd); -/// # } -/// ``` -#[macro_export] -macro_rules! random_percentage { - ( $( $x:expr ),* ) => { - { - use rand::{thread_rng, Rng}; - - let mut rng = thread_rng(); - - rng.gen_range::(0 as f64, 100 as f64) - - } - }; -} - -/// This macro generates a random number for a given range. -/// Returns a u32. -/// -/// # Arguments -/// -/// * `a: u32` - The lowest number of the range to use for the random number.
-/// * `b: u32` - The highest number of the range to use for the random number.
-/// -/// # Example -/// -/// ```rust -/// # #[macro_use] extern crate test_data_generation; extern crate rand; -/// # fn main() { -/// let rnd: u32 = random_between!(0, 100); -/// println!("Your random number is {}", rnd); -/// # } -/// ``` -#[macro_export] -macro_rules! random_between { - ($a:expr, $b:expr) => { - { - use rand::{thread_rng, Rng}; - - let mut rng = thread_rng(); - let nbr = rng.gen_range::($a as u32, $b as u32); - - nbr - } - }; -} - -/// This function calculates the percent difference between 2 strings. -/// -/// # Arguments -/// -/// * `control: &String` - The string to compare against. This would be the real data from the data sample.
-/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
-/// -/// # Example -/// -/// ```rust -/// # #[macro_use] extern crate test_data_generation; extern crate levenshtein; -/// -/// # fn main() { -/// assert_eq!(realistic_test!("kitten", "sitting"), 76.92307692307692 as f64); -/// # } -/// -#[macro_export] -macro_rules! realistic_test { - ( $c:expr, $e:expr ) => { - { - let ld: f64 = levenshtein_distance!($c, $e) as f64; - let total: f64 = $c.len() as f64 + $e.len() as f64; - let diff: f64 = total - ld; - (1 as f64 - ((total - diff)/total)) * 100 as f64 - } - } -} +/// This macro calculates the levenshtein distance between 2 strings. +/// See: https://crates.io/crates/levenshtein +/// +/// # Arguments +/// +/// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the distance.
+/// +/// #Example +/// +/// ```rust +/// # #[macro_use] extern crate test_data_generation; extern crate levenshtein; +/// # fn main() { +/// assert_eq!(levenshtein_distance!("kitten", "sitting"), 3 as usize); +/// # } +/// +#[macro_export] +macro_rules! levenshtein_distance { + ( $c:expr, $e:expr ) => {{ + use levenshtein; + + levenshtein::levenshtein($c, $e) + }}; +} + +/// This macro generates a random number between 0 and 100. +/// Returns a f64. +/// +/// # Example +/// +/// ```rust +/// # #[macro_use] extern crate test_data_generation; extern crate rand; +/// # fn main() { +/// let rnd: f64 = random_percentage!(); +/// println!("Your random number is {}", rnd); +/// # } +/// ``` +#[macro_export] +macro_rules! random_percentage { + ( $( $x:expr ),* ) => {{ + use rand::{thread_rng, Rng}; + + let mut rng = thread_rng(); + + rng.gen_range::(0 as f64, 100 as f64) + }}; +} + +/// This macro generates a random number for a given range. +/// Returns a u32. +/// +/// # Arguments +/// +/// * `a: u32` - The lowest number of the range to use for the random number.
+/// * `b: u32` - The highest number of the range to use for the random number.
+/// +/// # Example +/// +/// ```rust +/// # #[macro_use] extern crate test_data_generation; extern crate rand; +/// # fn main() { +/// let rnd: u32 = random_between!(0, 100); +/// println!("Your random number is {}", rnd); +/// # } +/// ``` +#[macro_export] +macro_rules! random_between { + ($a:expr, $b:expr) => {{ + use rand::{thread_rng, Rng}; + + let mut rng = thread_rng(); + let nbr = rng.gen_range::($a as u32, $b as u32); + + nbr + }}; +} + +/// This function calculates the percent difference between 2 strings. +/// +/// # Arguments +/// +/// * `control: &String` - The string to compare against. This would be the real data from the data sample.
+/// * `experiment: &String` - The string to compare. This would be the generated data for which you want to find the percent difference.
+/// +/// # Example +/// +/// ```rust +/// # #[macro_use] extern crate test_data_generation; extern crate levenshtein; +/// +/// # fn main() { +/// assert_eq!(realistic_test!("kitten", "sitting"), 76.92307692307692 as f64); +/// # } +/// +#[macro_export] +macro_rules! realistic_test { + ( $c:expr, $e:expr ) => {{ + let ld: f64 = levenshtein_distance!($c, $e) as f64; + let total: f64 = $c.len() as f64 + $e.len() as f64; + let diff: f64 = total - ld; + (1 as f64 - ((total - diff) / total)) * 100 as f64 + }}; +} diff --git a/src/shared.rs b/src/shared.rs index 1fe5fc9..2e239e7 100644 --- a/src/shared.rs +++ b/src/shared.rs @@ -1,5 +1,5 @@ -use std::mem; use csv::Reader; +use std::mem; /// This function converts a String to a &'static str
/// @@ -29,27 +29,27 @@ pub fn string_to_static_str(s: String) -> &'static str { pub trait CsvManipulator { /// This function parses all the rows and splits the columns into separate Vectors - /// + /// /// # Arguments /// * `rdr: Reader<&[u8]>` - The csv::Reader that has read the csv file and is ready to process the data.
/// /// ```rust /// extern crate test_data_generation; /// extern crate csv; - /// + /// /// use test_data_generation::shared::CsvManipulator; /// use csv::Reader; - /// + /// /// fn main() { /// struct CsvMngr {} /// impl CsvManipulator for CsvMngr {} /// /// let mut data = String::from(""); - /// data.push_str("\"firstname\",\"lastname\"\n"); - /// data.push_str("\"Aaron\",\"Aaberg\"\n"); - /// data.push_str("\"Aaron\",\"Aaby\"\n"); - /// data.push_str("\"Abbey\",\"Aadland\"\n"); - /// data.push_str("\"Abbie\",\"Aagaard\"\n"); + /// data.push_str("\"firstname\",\"lastname\"\n"); + /// data.push_str("\"Aaron\",\"Aaberg\"\n"); + /// data.push_str("\"Aaron\",\"Aaby\"\n"); + /// data.push_str("\"Abbey\",\"Aadland\"\n"); + /// data.push_str("\"Abbie\",\"Aagaard\"\n"); /// data.push_str("\"Abby\",\"Aakre\""); /// /// let rdr: Reader<&[u8]> = csv::ReaderBuilder::new() @@ -68,7 +68,7 @@ pub trait CsvManipulator { /// ``` fn read_as_columns(mut rdr: Reader<&[u8]>) -> Vec> { let mut columns = Vec::new(); - + for result in rdr.records() { let record = result.expect("a CSV record"); @@ -97,36 +97,36 @@ mod tests { impl CsvManipulator for XTest {} #[test] - fn test_read_as_columns(){ + fn test_read_as_columns() { let mut data = String::from(""); - data.push_str("\"firstname\",\"lastname\"\n"); - data.push_str("\"Aaron\",\"Aaberg\"\n"); - data.push_str("\"Aaron\",\"Aaby\"\n"); - data.push_str("\"Abbey\",\"Aadland\"\n"); - data.push_str("\"Abbie\",\"Aagaard\"\n"); + data.push_str("\"firstname\",\"lastname\"\n"); + data.push_str("\"Aaron\",\"Aaberg\"\n"); + data.push_str("\"Aaron\",\"Aaby\"\n"); + data.push_str("\"Abbey\",\"Aadland\"\n"); + data.push_str("\"Abbie\",\"Aagaard\"\n"); data.push_str("\"Abby\",\"Aakre\""); - + let rdr: Reader<&[u8]> = csv::ReaderBuilder::new() - .has_headers(true) - .quote(b'"') - .double_quote(true) - .delimiter(b',') - .from_reader(data.as_bytes()); + .has_headers(true) + .quote(b'"') + .double_quote(true) + .delimiter(b',') + .from_reader(data.as_bytes()); let columns = XTest::read_as_columns(rdr); - let column0 = vec!("Aaron", "Aaron", "Abbey", "Abbie", "Abby"); - let column1 = vec!("Aaberg", "Aaby", "Aadland", "Aagaard", "Aakre"); - + let column0 = vec!["Aaron", "Aaron", "Abbey", "Abbie", "Abby"]; + let column1 = vec!["Aaberg", "Aaby", "Aadland", "Aagaard", "Aakre"]; + assert_eq!(columns[0], column0); assert_eq!(columns[1], column1); } #[test] // ensure the conversion of String to &'static str - fn test_to_static_str(){ + fn test_to_static_str() { let static_str: &'static str = "Hello World"; let my_string = String::from("Hello World"); - let my_static_str = string_to_static_str(my_string); + let my_static_str = string_to_static_str(my_string); assert_eq!(static_str, my_static_str); } diff --git a/tests/engine.rs b/tests/engine.rs index 1767e12..e43bcc2 100644 --- a/tests/engine.rs +++ b/tests/engine.rs @@ -2,18 +2,24 @@ extern crate test_data_generation; #[cfg(test)] mod tests { - use test_data_generation::engine::{Engine}; + use test_data_generation::engine::Engine; - struct Xtest{} - impl Engine for Xtest{} + struct Xtest {} + impl Engine for Xtest {} #[test] - fn test_pattern_definition_analyze_multithread(){ - let words = vec!("word-one".to_string(),"word-two".to_string(),"word-three".to_string(),"word-four".to_string(),"word-five".to_string()); + fn test_pattern_definition_analyze_multithread() { + let words = vec![ + "word-one".to_string(), + "word-two".to_string(), + "word-three".to_string(), + "word-four".to_string(), + "word-five".to_string(), + ]; let results = Xtest::analyze_entities(words); println!("{:?}", results); assert_eq!(results.len(), 5); - } -} \ No newline at end of file + } +} diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 8c5ae15..1c2eaf3 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -1,34 +1,40 @@ -#[macro_use] -extern crate log; -extern crate test_data_generation; -extern crate log4rs; - -#[cfg(test)] -mod tests { - use log4rs; - use test_data_generation::data_sample_parser::DataSampleParser; - - #[test] - // ensure that the crate (library) can be used in a executable - fn crate_integration_test(){ - // setup logging - log4rs::init_file("./tests/config/log4rs.yaml",Default::default()).unwrap(); - info!("Logging enabled..."); - - // start up a Data Sample Parser - let mut dsp = DataSampleParser::new_with(&String::from("./tests/config/tdg.yaml")); - - info!("Demo ..."); - println!("generate date:{}", dsp.demo_date()); - println!("generate person:{}", dsp.demo_person_name()); - - info!("Analyzing CSV file ..."); - println!("reading csv file: {}", dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap()); - - info!("Generating a first name based on the CSV file ..."); - println!("Generated data for first name {}",dsp.generate_by_field_name("firstname".to_string())); - - println!("Generated data record: {:?}",dsp.generate_record()); - - } -} +#[macro_use] +extern crate log; +extern crate log4rs; +extern crate test_data_generation; + +#[cfg(test)] +mod tests { + use log4rs; + use test_data_generation::data_sample_parser::DataSampleParser; + + #[test] + // ensure that the crate (library) can be used in a executable + fn crate_integration_test() { + // setup logging + log4rs::init_file("./tests/config/log4rs.yaml", Default::default()).unwrap(); + info!("Logging enabled..."); + + // start up a Data Sample Parser + let mut dsp = DataSampleParser::new_with(&String::from("./tests/config/tdg.yaml")); + + info!("Demo ..."); + println!("generate date:{}", dsp.demo_date()); + println!("generate person:{}", dsp.demo_person_name()); + + info!("Analyzing CSV file ..."); + println!( + "reading csv file: {}", + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + .unwrap() + ); + + info!("Generating a first name based on the CSV file ..."); + println!( + "Generated data for first name {}", + dsp.generate_by_field_name("firstname".to_string()) + ); + + println!("Generated data record: {:?}", dsp.generate_record()); + } +} diff --git a/tests/performance_tests.rs b/tests/performance_tests.rs index ef68eea..fb48ecd 100644 --- a/tests/performance_tests.rs +++ b/tests/performance_tests.rs @@ -4,41 +4,51 @@ use test_data_generation::data_sample_parser; #[cfg(test)] mod tests { - use crate::data_sample_parser::DataSampleParser; - use test_data_generation::engine::{Engine}; - use std::time::{Instant}; - - struct Xtest{} - impl Engine for Xtest{} - - #[ignore] - #[test] - // Performance Test - fn analyzing_word(){ - let now = Instant::now(); - let words = vec!("word-one".to_string(),"word-two".to_string(),"word-three".to_string(),"word-four".to_string(),"word-five".to_string()); + use crate::data_sample_parser::DataSampleParser; + use std::time::Instant; + use test_data_generation::engine::Engine; + + struct Xtest {} + impl Engine for Xtest {} + + #[ignore] + #[test] + // Performance Test + fn analyzing_word() { + let now = Instant::now(); + let words = vec![ + "word-one".to_string(), + "word-two".to_string(), + "word-three".to_string(), + "word-four".to_string(), + "word-five".to_string(), + ]; let _results = Xtest::analyze_entities(words); - let d = now.elapsed().subsec_millis(); + let d = now.elapsed().subsec_millis(); - // should run in less than 10 millisecond - if d > 10 { - panic!("Failed: The execution time took {:?} milliseconds.", d); + // should run in less than 10 millisecond + if d > 10 { + panic!("Failed: The execution time took {:?} milliseconds.", d); } } - #[ignore] - #[test] + #[ignore] + #[test] // ensure DataSampleParser can analyze a csv formatted file - fn analyzing_csv_file_15k(){ - let mut dsp = DataSampleParser::new(); - let now = Instant::now(); - - dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")).unwrap(); - - if now.elapsed().as_secs() > 60 { - panic!("Failed: The execution time took {:?} seconds.", now.elapsed().as_secs()); - } + fn analyzing_csv_file_15k() { + let mut dsp = DataSampleParser::new(); + let now = Instant::now(); + + dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")) + .unwrap(); + + if now.elapsed().as_secs() > 60 { + panic!( + "Failed: The execution time took {:?} seconds.", + now.elapsed().as_secs() + ); + } } } diff --git a/tests/samples/sample-01-dsp.json b/tests/samples/sample-01-dsp.json index 32ef2e4..ea1a8a4 100644 --- a/tests/samples/sample-01-dsp.json +++ b/tests/samples/sample-01-dsp.json @@ -1 +1 @@ -{"issues":false,"cfg":null,"profiles":[["firstname",{"id":"firstname","patterns":{"Vccc":1,"Vccvc":1,"Vccvv":1,"Vvcvc":2},"pattern_total":5,"pattern_keys":["Vccc","Vccvc","Vccvv","Vvcvc"],"pattern_vals":[1,1,1,2],"pattern_percentages":[["Vvcvc",40.0],["Vccc",20.0],["Vccvc",20.0],["Vccvv",20.0]],"pattern_ranks":[["Vvcvc",40.0],["Vccc",60.0],["Vccvc",80.0],["Vccvv",100.0]],"sizes":{"4":1,"5":4},"size_total":5,"size_ranks":[[5,80.0],[4,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"i","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"y","prior_key":"e","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"i","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"i","prior_key":"b","next_key":"e","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"y","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3}]]}],["lastname",{"id":"lastname","patterns":{"Vvcc":1,"Vvccv":1,"Vvccvcc":1,"Vvcvcc":1,"Vvcvvcc":1},"pattern_total":5,"pattern_keys":["Vvcc","Vvccv","Vvccvcc","Vvcvcc","Vvcvvcc"],"pattern_vals":[1,1,1,1,1],"pattern_percentages":[["Vvcc",20.0],["Vvccv",20.0],["Vvccvcc",20.0],["Vvcvcc",20.0],["Vvcvvcc",20.0]],"pattern_ranks":[["Vvcc",20.0],["Vvccv",40.0],["Vvccvcc",60.0],["Vvcvcc",80.0],["Vvcvvcc",100.0]],"sizes":{"4":1,"5":1,"6":1,"7":2},"size_total":5,"size_ranks":[[7,40.0],[4,60.0],[5,80.0],[6,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"a","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"r","prior_key":"e","next_key":"g","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"r","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"l","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"g","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"r","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"g","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":5},{"key":"a","prior_key":"A","next_key":"k","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"d","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"n","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5}],[{"key":"g","prior_key":"a","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"b","prior_key":"a","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"k","prior_key":"a","next_key":"r","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"a","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"a","next_key":"l","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"n","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6}],[{"key":"a","prior_key":"g","next_key":"a","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"r","prior_key":"k","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"l","prior_key":"d","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3}]]}]]} \ No newline at end of file +{"issues":false,"cfg":null,"profiles":[["firstname",{"id":"firstname","patterns":{"Vccc":1,"Vccvc":1,"Vccvv":1,"Vvcvc":2},"pattern_total":5,"pattern_keys":["Vccc","Vccvc","Vccvv","Vvcvc"],"pattern_vals":[1,1,1,2],"pattern_percentages":[["Vvcvc",40.0],["Vccc",20.0],["Vccvc",20.0],["Vccvv",20.0]],"pattern_ranks":[["Vvcvc",40.0],["Vccc",60.0],["Vccvc",80.0],["Vccvv",100.0]],"sizes":{"4":1,"5":4},"size_total":5,"size_ranks":[[5,80.0],[4,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"y","prior_key":"e","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"i","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"n","prior_key":"o","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":4},{"key":"A","prior_key":null,"next_key":"b","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0}],[{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"b","prior_key":"A","next_key":"b","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"b","prior_key":"b","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"i","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"r","prior_key":"a","next_key":"o","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"b","prior_key":"b","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"e","prior_key":"b","next_key":"y","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"i","prior_key":"b","next_key":"e","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"o","prior_key":"r","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3}]]}],["lastname",{"id":"lastname","patterns":{"Vvcc":1,"Vvccv":1,"Vvccvcc":1,"Vvcvcc":1,"Vvcvvcc":1},"pattern_total":5,"pattern_keys":["Vvcc","Vvccv","Vvccvcc","Vvcvcc","Vvcvvcc"],"pattern_vals":[1,1,1,1,1],"pattern_percentages":[["Vvcc",20.0],["Vvccv",20.0],["Vvccvcc",20.0],["Vvcvcc",20.0],["Vvcvvcc",20.0]],"pattern_ranks":[["Vvcc",20.0],["Vvccv",40.0],["Vvccvcc",60.0],["Vvcvcc",80.0],["Vvcvvcc",100.0]],"sizes":{"4":1,"5":1,"6":1,"7":2},"size_total":5,"size_ranks":[[7,40.0],[4,60.0],[5,80.0],[6,100.0]],"processors":4,"facts":[[{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"l","next_key":"n","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"a","prior_key":"a","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"r","prior_key":"e","next_key":"g","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":4},{"key":"A","prior_key":null,"next_key":"a","pattern_placeholder":"V","starts_with":1,"ends_with":0,"index_offset":0},{"key":"e","prior_key":"r","next_key":null,"pattern_placeholder":"v","starts_with":0,"ends_with":1,"index_offset":4}],[{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"a","prior_key":"A","next_key":"d","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"n","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"g","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"r","prior_key":"a","next_key":"d","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":5},{"key":"a","prior_key":"A","next_key":"b","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1},{"key":"g","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":5},{"key":"a","prior_key":"A","next_key":"k","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":1}],[{"key":"b","prior_key":"a","next_key":"y","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"a","next_key":"l","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"n","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"g","prior_key":"a","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"d","prior_key":"r","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":6},{"key":"b","prior_key":"a","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2},{"key":"k","prior_key":"a","next_key":"r","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":2}],[{"key":"y","prior_key":"b","next_key":null,"pattern_placeholder":"c","starts_with":0,"ends_with":1,"index_offset":3},{"key":"l","prior_key":"d","next_key":"a","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3},{"key":"a","prior_key":"g","next_key":"a","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"e","prior_key":"b","next_key":"r","pattern_placeholder":"v","starts_with":0,"ends_with":0,"index_offset":3},{"key":"r","prior_key":"k","next_key":"e","pattern_placeholder":"c","starts_with":0,"ends_with":0,"index_offset":3}]]}]]} \ No newline at end of file From 012057d653c860dc741cb3b913fe9f1b7c3964a2 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 11:49:12 -0500 Subject: [PATCH 05/13] Issue #90 --- src/engine/mod.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/engine/mod.rs b/src/engine/mod.rs index dcd7133..d3d8af4 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -501,10 +501,12 @@ pub trait Engine { for result in results { match profile.apply_facts(result.0, result.1) { Ok(_) => {} - Err(e) => return Err(format!( + Err(e) => { + return Err(format!( "Error: Couldn't apply the Pattern and Facts to the Profile. Error Message: {}", e.to_string() - )), + )) + } } } From 0acb22ddd59657efabf79f5c537c7b0575e79557 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 11:50:57 -0500 Subject: [PATCH 06/13] Updated README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bbf6461..f98978a 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Coverage Status](https://coveralls.io/repos/github/dsietz/test-data-generation/badge.svg?branch=master)](https://coveralls.io/github/dsietz/test-data-generation?branch=master) [![Docs.rs](https://docs.rs/test-data-generation/badge.svg)](https://docs.rs/test-data-generation) -Linux: [![Build Status](https://travis-ci.org/dsietz/test-data-generation.svg?branch=master)](https://travis-ci.org/dsietz/test-data-generation) +Linux: [![Build Status](https://github.com/dsietz/test-data-generation/actions/workflows/master.yaml/badge.svg)](https://github.com/dsietz/test-data-generation/actions/workflows/master.yaml) Windows: [![Build status](https://ci.appveyor.com/api/projects/status/uw58v5t8ynwj8s8o/branch/master?svg=true)](https://ci.appveyor.com/project/dsietz/test-data-generation/branch/master) ## [Fast test data generation!](#head1234) From 60f04089f6786de6ce927049c3f083c72674ac45 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 12:23:29 -0500 Subject: [PATCH 07/13] Trying to get code coverage to work --- .github/workflows/development.yaml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index de5684e..2886abf 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -92,13 +92,15 @@ jobs: uses: actions-rs/cargo@v1 with: command: clean + # - name: Gather coverage data + # uses: actions-rs/tarpaulin@v0.1 + # with: + # version: '0.15.0' + # out-type: 'Lcov' + # run-types: Tests + # args: '-- --test-threads 1' - name: Gather coverage data - uses: actions-rs/tarpaulin@v0.1 - with: - version: '0.15.0' - out-type: 'Lcov' - run-types: Tests - args: '-- --test-threads 1' + uses: docker://lpenz/ghaction-rust-coverage:0.2 - name: Coveralls upload uses: coverallsapp/github-action@master with: From b065abcb80dedaf55adb521e795e251c73db3dc8 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 12:31:28 -0500 Subject: [PATCH 08/13] trying to get codecoverag to work --- .github/workflows/development.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index 2886abf..09b2e1e 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -92,7 +92,7 @@ jobs: uses: actions-rs/cargo@v1 with: command: clean - # - name: Gather coverage data + # - name: Gather coverage data # Error due to https://github.com/xd009642/tarpaulin/issues/461 # uses: actions-rs/tarpaulin@v0.1 # with: # version: '0.15.0' From a25a75e50553ffeb30ccd8fb821ab35a29af726b Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 12:43:53 -0500 Subject: [PATCH 09/13] Fixed warnings and README --- .github/workflows/master.yaml | 15 +++++++++------ README.md | 6 +++--- src/data_sample_parser.rs | 13 ++++++------- src/engine/mod.rs | 4 ++-- 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/.github/workflows/master.yaml b/.github/workflows/master.yaml index 1b35556..7aae13e 100644 --- a/.github/workflows/master.yaml +++ b/.github/workflows/master.yaml @@ -92,17 +92,20 @@ jobs: uses: actions-rs/cargo@v1 with: command: clean + # - name: Gather coverage data # Error due to https://github.com/xd009642/tarpaulin/issues/461 + # uses: actions-rs/tarpaulin@v0.1 + # with: + # version: '0.15.0' + # out-type: 'Lcov' + # run-types: Tests + # args: '-- --test-threads 1' - name: Gather coverage data - uses: actions-rs/tarpaulin@v0.1 - with: - version: '0.15.0' - out-type: 'Lcov' - run-types: Tests - args: '-- --test-threads 1' + uses: docker://lpenz/ghaction-rust-coverage:0.2 - name: Coveralls upload uses: coverallsapp/github-action@master with: github-token: ${{ secrets.GITHUB_TOKEN }} + #coveralls-token: ${{ secrets.COVERALLS_TOKEN }} path-to-lcov: lcov.info parallel: true diff --git a/README.md b/README.md index f98978a..41a1574 100644 --- a/README.md +++ b/README.md @@ -41,9 +41,9 @@ or production environment (option #1 above) ## What's New -Here's whats new in 0.2.1: -+ [Fix for issue #88](https://github.com/dsietz/test-data-generation/issues/88) -+ Added a new Demo 03 `cargo run --example 03_demo` +Here's whats new in 0.3.0: ++ [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90) ++ ## About diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index 7cbc6b7..bd5aa18 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -81,7 +81,6 @@ use crate::shared::CsvManipulator; use crate::Profile; use csv; use indexmap::IndexMap; -use serde_json::map::Map; use std::fs::File; use std::io; use std::io::prelude::*; @@ -90,7 +89,7 @@ use std::result::Result; //use csv::StringRecord; use csv::WriterBuilder; use serde_json; -use serde_json::{json, Value}; +use serde_json::Value; use std::error::Error; use std::sync::mpsc; @@ -219,7 +218,7 @@ impl DataSampleParser { }; // Support backwards compatibility for DSP saved using prior versions - let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); + let dsp: Value = serde_json::from_str(&serialized).unwrap(); let prfils = dsp.get("profiles").unwrap(); match prfils.is_array() { @@ -236,7 +235,7 @@ impl DataSampleParser { } fn updgrade_to_latest_version(serialized: String) -> DataSampleParser { - let mut dsp: Value = serde_json::from_str(&serialized).unwrap(); + let dsp: Value = serde_json::from_str(&serialized).unwrap(); let prfils = dsp.get("profiles").unwrap(); let mut pm: ProfilesMap = ProfilesMap::new(); let issues = dsp.get("issues").unwrap().as_bool().unwrap(); @@ -260,7 +259,7 @@ impl DataSampleParser { } let mut rtn = match dsp.get("cfg").unwrap() { - Null => DataSampleParser::new(), + serde_json::Value::Null => DataSampleParser::new(), _ => DataSampleParser::new_with( &dsp.get("cfg") .unwrap() @@ -827,7 +826,7 @@ mod tests { #[test] // ensure a new Data Sample Parser can be created fn test_new() { - let dsp = DataSampleParser::new(); + let _dsp = DataSampleParser::new(); assert!(true); } @@ -835,7 +834,7 @@ mod tests { #[test] // ensure a new Data Sample Parser can be created with configurations fn test_new_with() { - let dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); + let _dsp = DataSampleParser::new_with(&String::from("./config/tdg.yaml")); assert!(true); } diff --git a/src/engine/mod.rs b/src/engine/mod.rs index d3d8af4..8dd02a0 100644 --- a/src/engine/mod.rs +++ b/src/engine/mod.rs @@ -366,12 +366,12 @@ impl PatternDefinition { // only if there is a next key if nk.is_some() { - &fact.set_next_key(nk.unwrap()); + let _ = &fact.set_next_key(nk.unwrap()); } // only if there is a prior key if pk.is_some() { - &fact.set_prior_key(pk.unwrap()); + let _ = &fact.set_prior_key(pk.unwrap()); } fact From 6b845e996518d09f0a5816e322b9cc916d2598b8 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 12:51:33 -0500 Subject: [PATCH 10/13] trying to get audit to pass-through on fail --- .github/workflows/development.yaml | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/development.yaml b/.github/workflows/development.yaml index 09b2e1e..a0e34ea 100644 --- a/.github/workflows/development.yaml +++ b/.github/workflows/development.yaml @@ -77,6 +77,7 @@ jobs: - uses: actions-rs/audit-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true coveralls-grcov: name: Code Coverage diff --git a/README.md b/README.md index 41a1574..a1f63d5 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ or production environment (option #1 above) ## What's New Here's whats new in 0.3.0: +> WARNING - Upgrading to 0.3.0 can cause breakage when loading a Data Sample Parser that was saved with a prior version, (e.g.: 0.2.1). Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file, however, it is not guaranteed. + [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90) + From 5afb468b11567dc03911afccff80d1427ca32ff1 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 13:58:33 -0500 Subject: [PATCH 11/13] Issue #91 --- .github/workflows/master.yaml | 1 + .gitignore | 3 +- README.md | 2 +- examples/02_demo.rs | 2 +- src/data_sample_parser.rs | 96 +++++++++++++++++++++++------------ src/lib.rs | 9 ++-- tests/integration_test.rs | 2 +- tests/performance_tests.rs | 2 +- 8 files changed, 76 insertions(+), 41 deletions(-) diff --git a/.github/workflows/master.yaml b/.github/workflows/master.yaml index 7aae13e..5d4f7f3 100644 --- a/.github/workflows/master.yaml +++ b/.github/workflows/master.yaml @@ -77,6 +77,7 @@ jobs: - uses: actions-rs/audit-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true coveralls-grcov: name: Code Coverage diff --git a/.gitignore b/.gitignore index 9f9e744..9966c67 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,5 @@ Cargo.lock *.orig # Generated file due to testing -/tests/samples/generated-01.csv \ No newline at end of file +/tests/samples/generated-01.csv +/tests/samples/generated-01b.csv \ No newline at end of file diff --git a/README.md b/README.md index a1f63d5..4fc94b7 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,8 @@ or production environment (option #1 above) ## What's New Here's whats new in 0.3.0: -> WARNING - Upgrading to 0.3.0 can cause breakage when loading a Data Sample Parser that was saved with a prior version, (e.g.: 0.2.1). Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file, however, it is not guaranteed. + [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90) + > Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file from a prior version, (e.g.: 0.2.1), however, it is not guaranteed. + ## About diff --git a/examples/02_demo.rs b/examples/02_demo.rs index d8f24bb..ce07af1 100644 --- a/examples/02_demo.rs +++ b/examples/02_demo.rs @@ -9,7 +9,7 @@ fn main() { // initalize a new DataSampelParser let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv"), None) .unwrap(); // generate some test data using the demo functions diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index bd5aa18..2653904 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -67,9 +67,10 @@ //! //! fn main() { //! let mut dsp = DataSampleParser::new(); -//! -//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); -//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); +//! +//! // Using the default delimiter (comma) +//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); +//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv"), None).unwrap(); //! } //! ``` //! @@ -96,7 +97,8 @@ use std::sync::mpsc; use std::sync::mpsc::{Receiver, Sender}; use std::thread; -// type ProfilesMap = BTreeMap; +const DELIMITER:u8 = b','; + type ProfilesMap = IndexMap; #[derive(Serialize, Deserialize, Debug)] @@ -350,6 +352,7 @@ impl DataSampleParser { /// # Arguments /// /// * `data: &String` - The textual content of a csv formatted sample data file.
+ /// * `delimiter: Option` - The delimiter to use, otherwise use the default.
/// /// # Example /// @@ -369,17 +372,18 @@ impl DataSampleParser { /// data.push_str("\"Abbie\",\"Aagaard\"\n"); /// data.push_str("\"Abby\",\"Aakre\""); /// - /// assert_eq!(dsp.analyze_csv_data(&data).unwrap(),1); + /// // Use the default delimiter (comma) + /// assert_eq!(dsp.analyze_csv_data(&data, None).unwrap(),1); /// } /// ``` - pub fn analyze_csv_data(&mut self, data: &String) -> Result { + pub fn analyze_csv_data(&mut self, data: &String, delimiter: Option) -> Result { debug!("Starting to analyzed the csv data {}", data); let mut rdr = csv::ReaderBuilder::new() .has_headers(true) .quote(b'"') .double_quote(true) - .delimiter(b',') + .delimiter(Self::else_default_delimiter(delimiter)) .from_reader(data.as_bytes()); //iterate through the headers @@ -426,6 +430,7 @@ impl DataSampleParser { /// # Arguments /// /// * `path: &String` - The full path name of the csv formatted sample data file.
+ /// * `delimiter: Option` - The delimiter to use, otherwise use the default.
/// /// # Example /// @@ -438,10 +443,11 @@ impl DataSampleParser { /// // initalize a new DataSampelParser /// let mut dsp = DataSampleParser::new(); /// - /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(),1); + /// // Use the default delimiter (comma) + /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(),1); /// } /// ``` - pub fn analyze_csv_file(&mut self, path: &String) -> Result { + pub fn analyze_csv_file(&mut self, path: &String, delimiter: Option) -> Result { info!("Starting to analyzed the csv file {}", path); let mut file = (File::open(path).map_err(|e| { @@ -457,7 +463,7 @@ impl DataSampleParser { }) .unwrap(); - self.analyze_csv_data(&data) + self.analyze_csv_data(&data, delimiter) } /// This function generates date as strings using the a `demo` profile @@ -539,6 +545,17 @@ impl DataSampleParser { profil.generate() } + fn else_default_delimiter(delimiter: Option) -> u8{ + match delimiter{ + Some(d) => { + return d; + }, + None => { + return DELIMITER; + } + } + } + /// This function returns a vector of header names /// /// # Example @@ -552,7 +569,7 @@ impl DataSampleParser { /// // initalize a new DataSampelParser /// let mut dsp = DataSampleParser::new(); /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); /// let headers = dsp.extract_headers(); /// /// assert_eq!(headers.len(), 2); @@ -584,7 +601,7 @@ impl DataSampleParser { /// // initalize a new DataSampelParser /// let mut dsp = DataSampleParser::new(); /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); /// println!("Generated data for first name {}",dsp.generate_by_field_name("firstname".to_string())); /// } /// ``` @@ -609,7 +626,7 @@ impl DataSampleParser { /// // initalize a new DataSampelParser /// let mut dsp = DataSampleParser::new(); /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); /// println!("Generated data record: {:?}",dsp.generate_record()); /// } /// ``` @@ -636,6 +653,7 @@ impl DataSampleParser { /// /// * `row_count: u32` - The number of rows to generate.
/// * `path: &String` - The full path name where to save the csv file.
+ /// * `delimiter: Option` - The delimiter to use, otherwise use the default.
/// /// # Example /// @@ -648,18 +666,18 @@ impl DataSampleParser { /// // initalize a new DataSampelParser /// let mut dsp = DataSampleParser::new(); /// - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); - /// dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); + /// dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv"), None).unwrap(); /// } /// ``` - pub fn generate_csv(&mut self, row_count: u32, path: &String) -> Result<(), Box> { + pub fn generate_csv(&mut self, row_count: u32, path: &String, delimiter: Option) -> Result<(), Box> { info!("generating csv file {}", path); let mut wtr = (WriterBuilder::new() .has_headers(true) .quote(b'"') .double_quote(true) - .delimiter(b',') + .delimiter(Self::else_default_delimiter(delimiter)) .from_path(path) .map_err(|e| { error!("csv file {} couldn't be created!", path); @@ -782,7 +800,7 @@ impl DataSampleParser { /// fn main() { /// // analyze the dataset /// let mut dsp = DataSampleParser::new(); - /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")).unwrap(); + /// dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv"), None).unwrap(); /// /// assert_eq!(dsp.save(&String::from("./tests/samples/sample-00-dsp")).unwrap(), true); /// } @@ -875,7 +893,7 @@ mod tests { fn test_read_headers() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); let headers = dsp.extract_headers(); @@ -892,7 +910,7 @@ mod tests { expected.push("column-G"); let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-02.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-02.csv"), None) .unwrap(); let headers = dsp.extract_headers(); @@ -905,7 +923,7 @@ mod tests { let mut dsp = DataSampleParser::new(); assert_eq!( - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(), 1 ); @@ -913,7 +931,7 @@ mod tests { #[test] // ensure DataSampleParser can analyze a csv formatted text - fn test_parse_csv_data() { + fn test_parse_csv_data_using_defaults() { let mut dsp = DataSampleParser::new(); let mut data = String::from(""); data.push_str("\"firstname\",\"lastname\"\n"); @@ -923,15 +941,29 @@ mod tests { data.push_str("\"Abbie\",\"Aagaard\"\n"); data.push_str("\"Abby\",\"Aakre\""); - assert_eq!(dsp.analyze_csv_data(&data).unwrap(), 1); + assert_eq!(dsp.analyze_csv_data(&data, None).unwrap(), 1); } + #[test] + // ensure DataSampleParser can analyze a csv formatted text + fn test_parse_csv_data() { + let mut dsp = DataSampleParser::new(); + let mut data = String::from(""); + data.push_str("\"firstname\"|\"lastname\"\n"); + data.push_str("\"Aaron\"|\"Aaberg\"\n"); + data.push_str("\"Aaron\"|\"Aaby\"\n"); + data.push_str("\"Abbey\"|\"Aadland\"\n"); + data.push_str("\"Abbie\"|\"Aagaard\"\n"); + data.push_str("\"Abby\"|\"Aakre\""); + + assert_eq!(dsp.analyze_csv_data(&data, Some(b'|')).unwrap(), 1); + } #[test] // ensure DataSampleParser can analyze a csv formatted file fn test_generate_field_from_csv_file() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); println!( "Generated data for first name {}", @@ -944,7 +976,7 @@ mod tests { fn test_generate_record_from_csv_file() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); assert_eq!(dsp.generate_record().len(), 2); } @@ -955,7 +987,7 @@ mod tests { let mut dsp = DataSampleParser::new(); assert_eq!( - dsp.analyze_csv_file(&String::from("./badpath/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./badpath/sample-01.csv"), None) .is_err(), true ); @@ -965,7 +997,7 @@ mod tests { // ensure the DataSampleParser object can be saved to file fn test_save() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-00.csv"), None) .unwrap(); assert_eq!( @@ -1001,7 +1033,7 @@ mod tests { // demo test fn test_demo() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); println!( @@ -1018,7 +1050,7 @@ mod tests { fn test_extract_headers_from_sample() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); let headers = dsp.extract_headers(); @@ -1030,13 +1062,13 @@ mod tests { fn test_generate_csv_test_data_from_sample() { let mut dsp = DataSampleParser::new(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); - dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")) + dsp.generate_csv(100, &String::from("./tests/samples/generated-01b.csv"), Some(b'|')) .unwrap(); let generated_row_count = - match File::open(format!("{}", "./tests/samples/generated-01.csv")) { + match File::open(format!("{}", "./tests/samples/generated-01b.csv")) { Err(_e) => 0, Ok(f) => { let mut count = 0; diff --git a/src/lib.rs b/src/lib.rs index d8a7916..c31f0ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,7 +76,7 @@ //! //! fn main() { //! let mut dsp = DataSampleParser::new(); -//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); +//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); //! //! println!("My new name is {} {}", dsp.generate_record()[0], dsp.generate_record()[1]); //! // My new name is Abbon Aady @@ -91,7 +91,7 @@ //! //! fn main() { //! let mut dsp = DataSampleParser::new(); -//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); +//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); //! //! assert_eq!(dsp.save(&String::from("./tests/samples/sample-01-dsp")).unwrap(), true); //! } @@ -120,8 +120,9 @@ //! fn main() { //! let mut dsp = DataSampleParser::new(); //! -//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")).unwrap(); -//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv")).unwrap(); +//! // Use the default delimiter (comma) +//! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); +//! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv"), None).unwrap(); //! } //! ``` #![crate_type = "lib"] diff --git a/tests/integration_test.rs b/tests/integration_test.rs index 1c2eaf3..7bdee30 100644 --- a/tests/integration_test.rs +++ b/tests/integration_test.rs @@ -25,7 +25,7 @@ mod tests { info!("Analyzing CSV file ..."); println!( "reading csv file: {}", - dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap() ); diff --git a/tests/performance_tests.rs b/tests/performance_tests.rs index fb48ecd..de886fa 100644 --- a/tests/performance_tests.rs +++ b/tests/performance_tests.rs @@ -41,7 +41,7 @@ mod tests { let mut dsp = DataSampleParser::new(); let now = Instant::now(); - dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv")) + dsp.analyze_csv_file(&String::from("./tests/samples/sample-names.csv"), None) .unwrap(); if now.elapsed().as_secs() > 60 { From 1ffc142585dd829ea27cc0a547d3e0c93231f6b2 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 14:00:16 -0500 Subject: [PATCH 12/13] Issue #91 --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4fc94b7..8ac6ac5 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,8 @@ or production environment (option #1 above) Here's whats new in 0.3.0: + [Fix for issue #90](https://github.com/dsietz/test-data-generation/issues/90) > Every effort has been made to automatically convert to the latest version of the DSP object when loading from a saved dsp file from a prior version, (e.g.: 0.2.1), however, it is not guaranteed. -+ ++ [Added issue #91](https://github.com/dsietz/test-data-generation/issues/91) + > Optional paramters for setting the delimiter when analyzing and generating csv files. ## About From 721cff1f2fef959f70a9cfd261cce6349707b663 Mon Sep 17 00:00:00 2001 From: dsietz Date: Sun, 14 Nov 2021 14:05:57 -0500 Subject: [PATCH 13/13] fixed formatting --- src/data_sample_parser.rs | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/src/data_sample_parser.rs b/src/data_sample_parser.rs index 2653904..7f31bd4 100644 --- a/src/data_sample_parser.rs +++ b/src/data_sample_parser.rs @@ -67,7 +67,7 @@ //! //! fn main() { //! let mut dsp = DataSampleParser::new(); -//! +//! //! // Using the default delimiter (comma) //! dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(); //! dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv"), None).unwrap(); @@ -97,7 +97,7 @@ use std::sync::mpsc; use std::sync::mpsc::{Receiver, Sender}; use std::thread; -const DELIMITER:u8 = b','; +const DELIMITER: u8 = b','; type ProfilesMap = IndexMap; @@ -376,7 +376,11 @@ impl DataSampleParser { /// assert_eq!(dsp.analyze_csv_data(&data, None).unwrap(),1); /// } /// ``` - pub fn analyze_csv_data(&mut self, data: &String, delimiter: Option) -> Result { + pub fn analyze_csv_data( + &mut self, + data: &String, + delimiter: Option, + ) -> Result { debug!("Starting to analyzed the csv data {}", data); let mut rdr = csv::ReaderBuilder::new() @@ -447,7 +451,11 @@ impl DataSampleParser { /// assert_eq!(dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None).unwrap(),1); /// } /// ``` - pub fn analyze_csv_file(&mut self, path: &String, delimiter: Option) -> Result { + pub fn analyze_csv_file( + &mut self, + path: &String, + delimiter: Option, + ) -> Result { info!("Starting to analyzed the csv file {}", path); let mut file = (File::open(path).map_err(|e| { @@ -545,11 +553,11 @@ impl DataSampleParser { profil.generate() } - fn else_default_delimiter(delimiter: Option) -> u8{ - match delimiter{ + fn else_default_delimiter(delimiter: Option) -> u8 { + match delimiter { Some(d) => { return d; - }, + } None => { return DELIMITER; } @@ -670,7 +678,12 @@ impl DataSampleParser { /// dsp.generate_csv(100, &String::from("./tests/samples/generated-01.csv"), None).unwrap(); /// } /// ``` - pub fn generate_csv(&mut self, row_count: u32, path: &String, delimiter: Option) -> Result<(), Box> { + pub fn generate_csv( + &mut self, + row_count: u32, + path: &String, + delimiter: Option, + ) -> Result<(), Box> { info!("generating csv file {}", path); let mut wtr = (WriterBuilder::new() @@ -1064,8 +1077,12 @@ mod tests { dsp.analyze_csv_file(&String::from("./tests/samples/sample-01.csv"), None) .unwrap(); - dsp.generate_csv(100, &String::from("./tests/samples/generated-01b.csv"), Some(b'|')) - .unwrap(); + dsp.generate_csv( + 100, + &String::from("./tests/samples/generated-01b.csv"), + Some(b'|'), + ) + .unwrap(); let generated_row_count = match File::open(format!("{}", "./tests/samples/generated-01b.csv")) {