Skip to content

Commit

Permalink
Merge pull request #16 from VolumeGraphics/multiple_include_exclude
Browse files Browse the repository at this point in the history
Multiple include exclude
  • Loading branch information
ChrisRega committed Dec 12, 2022
2 parents f2d1bcb + 34f39e1 commit e9258d2
Show file tree
Hide file tree
Showing 8 changed files with 104 additions and 48 deletions.
4 changes: 2 additions & 2 deletions Cargo.toml
Expand Up @@ -4,7 +4,7 @@ description = "A flexible folder comparison tool / crate with html reporting."
repository = "https://github.com/VolumeGraphics/havocompare"
homepage = "https://github.com/VolumeGraphics/havocompare"
documentation = "https://docs.rs/havocompare"
version = "0.1.4-RC3"
version = "0.1.4"
edition = "2021"
license = "MIT"
authors = ["Volume Graphics GmbH"]
Expand Down Expand Up @@ -40,6 +40,6 @@ vg_errortools = "0.1.0"
ansi_term = "0.12"

[dev-dependencies]
env_logger = "0.9"
env_logger = "0.10"
tracing = {version = "0.1", default-features = false}
tracing-subscriber = {version = "0.3", default-features = false, features = ["env-filter", "fmt"]}
39 changes: 26 additions & 13 deletions README.md
Expand Up @@ -21,8 +21,12 @@ See the following example `config.yaml`:
```yaml
rules:
- name: "Numerical results csv"
pattern_include: "**/export_*.csv"
pattern_exclude: "**/export_1337.csv"
# you can have multiple includes and excludes
pattern_include:
- "**/export_*.csv"
# excludes are optional
pattern_exclude:
- "**/export_1337.csv"
CSV:
comparison_modes:
- Relative: 0.1
Expand Down Expand Up @@ -68,16 +72,18 @@ See the following example with all optional parameters set:
```yaml
rules:
- name: "CSV - Demo all options"
# what files to include
pattern_include: "**/*.csv"
# optional: of all included files, remove the ones matching the exclude pattern
pattern_exclude: "**/ignored.csv"
# what files to include - use as many as make sense to reduce duplication in your rules
pattern_include:
- "**/*.csv"
# optional: of all included files, remove the ones matching any exclude pattern
pattern_exclude:
- "**/ignored.csv"
CSV:
# delimiters are optional, if not given, they will be auto-detected.
# auto-detection allows different delimiters for nominal and actual
decimal_separator: '.'
field_delimiter: ';'
# can have Absolute or Relative
# can have Absolute or Relative or both
comparison_modes:
- Absolute: 1.0
- Relative: 0.1
Expand Down Expand Up @@ -106,7 +112,8 @@ Only a threshold can be specified:
```yaml
rules:
- name: "JPG comparison"
pattern_include: "**/*.jpg"
pattern_include:
- "**/*.jpg"
# exclude can of course also be specified!
Image:
# threshold is between 0.0 for total difference, 0.5 for very dissimilar and 1.0 for perfect mach
Expand All @@ -120,8 +127,10 @@ crate is used. You can ignore single lines which you know are different by speci

```yaml
- name: "HTML-Compare strict"
pattern_exclude: "**/*_changed.html"
pattern_include: "**/*.html"
pattern_exclude:
- "**/*_changed.html"
pattern_include:
- "**/*.html"
PlainText:
# Normalized Damerau-Levenshtein distance
threshold: 1.0
Expand All @@ -137,8 +146,10 @@ For PDF text comparison the text will be extracted and written to temporary file

```yaml
- name: "PDF-Text-Compare"
pattern_exclude: "**/*_changed.pdf"
pattern_include: "**/*.pdf"
pattern_exclude:
- "**/*_changed.pdf"
pattern_include:
- "**/*.pdf"
PDFText:
# Normalized Damerau-Levenshtein distance
threshold: 1.0
Expand All @@ -156,7 +167,8 @@ Currently we only support SHA-256 but more checks can be added easily.

```yaml
- name: "Hash comparison strict"
pattern_exclude: "**/*.bin"
pattern_exclude:
- "**/*.bin"
Hash:
# Currently we only have Sha256
function: Sha256
Expand All @@ -166,6 +178,7 @@ Currently we only support SHA-256 but more checks can be added easily.
## Changelog

### 0.1.4
- Add multiple includes and excludes - warning, this will break yamls from 0.1.3 and earlier
- Remove all `unwrap` and `expect` in the library code in favor of correct error propagation
- Add preprocessing options for CSV files
- Refined readme.md
Expand Down
14 changes: 9 additions & 5 deletions config_scheme.json
Expand Up @@ -304,20 +304,24 @@
],
"required": [
"name",
"pattern_exclude",
"pattern_include"
],
"properties": {
"name": {
"type": "string"
},
"pattern_exclude": {
"type": [
"string",
"null"
]
"type": "array",
"items": {
"type": "string"
}
},
"pattern_include": {
"type": "string"
"type": "array",
"items": {
"type": "string"
}
}
}
}
Expand Down
63 changes: 46 additions & 17 deletions src/lib.rs
Expand Up @@ -77,26 +77,24 @@ struct ConfigurationFile {
#[derive(Debug, Deserialize, Serialize, JsonSchema)]
struct Rule {
name: String,
pattern_include: String,
pattern_exclude: Option<String>,
pattern_include: Vec<String>,
pattern_exclude: Option<Vec<String>>,
#[serde(flatten)]
file_type: ComparisonMode,
}

fn glob_files(
path: impl AsRef<Path>,
pattern: Option<&str>,
patterns: &[impl AsRef<str>],
) -> Result<Vec<PathBuf>, glob::PatternError> {
if let Some(pattern) = pattern {
let path_prefix = path.as_ref().join(pattern);
let mut files = Vec::new();
for pattern in patterns {
let path_prefix = path.as_ref().join(pattern.as_ref());
let path_pattern = path_prefix.to_string_lossy();
debug!("Globbing: {}", path_pattern);
Ok(glob::glob(path_pattern.as_ref())?
.filter_map(|c| c.ok())
.collect())
} else {
Ok(Vec::new())
files.extend(glob::glob(path_pattern.as_ref())?.filter_map(|p| p.ok()));
}
Ok(files)
}

fn filter_exclude(paths: Vec<PathBuf>, excludes: Vec<PathBuf>) -> Vec<PathBuf> {
Expand Down Expand Up @@ -158,6 +156,16 @@ fn process_file(
compare_result
}

fn get_files(
path: impl AsRef<Path>,
patterns_include: &[impl AsRef<str>],
patterns_exclude: &[impl AsRef<str>],
) -> Result<Vec<PathBuf>, glob::PatternError> {
let files_exclude = glob_files(path.as_ref(), patterns_exclude)?;
let files_include: Vec<_> = glob_files(path.as_ref(), patterns_include)?;
Ok(filter_exclude(files_include, files_exclude))
}

fn process_rule(
nominal: impl AsRef<Path>,
actual: impl AsRef<Path>,
Expand All @@ -180,13 +188,11 @@ fn process_rule(
return Ok(false);
}

let nominal_files_exclude = glob_files(nominal.as_ref(), rule.pattern_exclude.as_deref())?;
let nominal_paths: Vec<_> = glob_files(nominal.as_ref(), Some(rule.pattern_include.as_str()))?;
let nominal_cleaned_paths = filter_exclude(nominal_paths, nominal_files_exclude);
let exclude_patterns = rule.pattern_exclude.as_deref().unwrap_or_default();

let actual_files_exclude = glob_files(actual.as_ref(), rule.pattern_exclude.as_deref())?;
let actual_paths: Vec<_> = glob_files(actual.as_ref(), Some(rule.pattern_include.as_str()))?;
let actual_cleaned_paths = filter_exclude(actual_paths, actual_files_exclude);
let nominal_cleaned_paths =
get_files(nominal.as_ref(), &rule.pattern_include, exclude_patterns)?;
let actual_cleaned_paths = get_files(actual.as_ref(), &rule.pattern_include, exclude_patterns)?;

info!(
"Found {} files matching includes in actual, {} files in nominal",
Expand Down Expand Up @@ -271,11 +277,34 @@ mod tests {
let rule = Rule {
name: "test rule".to_string(),
file_type: ComparisonMode::Image(ImageCompareConfig { threshold: 1.0 }),
pattern_include: "*.".to_string(),
pattern_include: vec!["*.".to_string()],
pattern_exclude: None,
};
let mut result = Vec::new();
assert!(!process_rule("NOT_EXISTING", ".", &rule, &mut result).unwrap());
assert!(!process_rule(".", "NOT_EXISTING", &rule, &mut result).unwrap());
}

#[test]
fn multiple_include_exclude_works() {
let pattern_include = vec![
"**/Components.csv".to_string(),
"**/CumulatedHistogram.csv".to_string(),
];
let empty = vec![""];
let result =
get_files("tests/csv/data/", &pattern_include, &empty).expect("could not glob");
assert_eq!(result.len(), 2);
let excludes = vec!["**/Components.csv".to_string()];
let result =
get_files("tests/csv/data/", &pattern_include, &excludes).expect("could not glob");
assert_eq!(result.len(), 1);
let excludes = vec![
"**/Components.csv".to_string(),
"**/CumulatedHistogram.csv".to_string(),
];
let result =
get_files("tests/csv/data/", &pattern_include, &excludes).expect("could not glob");
assert!(result.is_empty());
}
}
2 changes: 1 addition & 1 deletion src/report/mod.rs
Expand Up @@ -390,7 +390,7 @@ pub(crate) fn create(
let target = &sub_folder.join(detail);
info!("moving subfolder {:?} to {:?}", &detail, &target);

let files = crate::glob_files(detail, Some("*"))?;
let files = crate::glob_files(detail, &["*"])?;
for file in files.iter() {
if let Some(file_name) = file.file_name() {
if !target.exists() || !target.is_dir() {
Expand Down
15 changes: 10 additions & 5 deletions tests/integ/config.yml
@@ -1,7 +1,9 @@
rules:
- name: "VGRF-Reporting CSV comparing"
pattern_include: "**/*.csv"
pattern_exclude: "**/*_diff.csv"
pattern_include:
- "**/*.csv"
pattern_exclude:
- "**/*_diff.csv"
CSV:
comparison_modes:
- Absolute: 1.0
Expand All @@ -11,8 +13,10 @@ rules:
exclude_field_regex: "Excluded"

- name: "HTML-Compare strict"
pattern_exclude: "**/*_changed.html"
pattern_include: "**/*.html"
pattern_exclude:
- "**/*_changed.html"
pattern_include:
- "**/*.html"
PlainText:
threshold: 1.0
ignore_lines:
Expand All @@ -21,7 +25,8 @@ rules:
- "[A-Z]*[0-9]"

- name: "HTML-Compare fuzzy"
pattern_include: "**/*.html"
pattern_include:
- "**/*.html"
PlainText:
threshold: 0.9

Expand Down
3 changes: 2 additions & 1 deletion tests/integ/jpg_compare.yml
@@ -1,5 +1,6 @@
rules:
- name: "JPG comparison"
pattern_include: "**/*.jpg"
pattern_include:
- "**/*.jpg"
Image:
threshold: 0.9
12 changes: 8 additions & 4 deletions tests/integ/vgrf.yml
@@ -1,16 +1,20 @@
rules:
- name: "VGRF-Reporting CSV comparing"
pattern_include: "**/*.csv"
pattern_exclude: "**/vg_report.csv"
pattern_include:
- "**/*.csv"
pattern_exclude:
- "**/vg_report.csv"
CSV:
comparison_modes:
- Absolute: 1.0
- Relative: 0.1
exclude_field_regex: "Excluded"

- name: "HTML-Compare strict"
pattern_exclude: "**/*_changed.html"
pattern_include: "**/*.html"
pattern_exclude:
- "**/*_changed.html"
pattern_include:
- "**/*.html"
PlainText:
threshold: 1.0
ignore_lines:
Expand Down

0 comments on commit e9258d2

Please sign in to comment.