Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sha256 check, fix BOM on windows #6

Merged
merged 4 commits into from
Oct 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ description = "A flexible folder comparison tool / crate with html reporting."
repository = "https://github.com/VolumeGraphics/havocompare"
homepage = "https://github.com/VolumeGraphics/havocompare"
documentation = "https://docs.rs/havocompare"
version = "0.1.1"
version = "0.1.2"
edition = "2021"
license = "MIT"
authors = ["Volume Graphics GmbH"]
Expand All @@ -30,6 +30,8 @@ itertools = "0.10"
tera = "1.17"
md5 = "0.7.0"
tempdir = "0.3"
sha2 = "0.10.6"
data-encoding = "2.3.2"

[target.'cfg(windows)'.dependencies]
ansi_term = "0.12"
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,24 @@ crate is used. You can ignore single lines which you know are different by speci
```


#### Hash comparison
For binary files which cannot otherwise be checked we can also do a simple hash comparison.
Currently we only support SHA-256 but more checks can be added easily.

```yaml
- name: "Hash comparison strict"
pattern_exclude: "**/*.bin"
Hash:
function: Sha256
```


## Changelog

### 0.1.2:
- Add SHA-256 comparison mode
- Fix BOM on windows for CSV comparison

### 0.1.1:
- Better error message on folder not found
- Better test coverage
Expand Down
34 changes: 32 additions & 2 deletions config_scheme.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Config",
"title": "ConfigurationFile",
"type": "object",
"required": [
"rules"
Expand Down Expand Up @@ -71,6 +71,23 @@
}
}
},
"HashConfig": {
"type": "object",
"required": [
"function"
],
"properties": {
"function": {
"$ref": "#/definitions/HashFunction"
}
}
},
"HashFunction": {
"type": "string",
"enum": [
"Sha256"
]
},
"ImageCompareConfig": {
"type": "object",
"required": [
Expand Down Expand Up @@ -149,7 +166,7 @@
"additionalProperties": false
},
{
"description": "stupid text compare",
"description": "plain text compare",
"type": "object",
"required": [
"PlainText"
Expand All @@ -160,6 +177,19 @@
}
},
"additionalProperties": false
},
{
"description": "Compare using file hashes",
"type": "object",
"required": [
"Hash"
],
"properties": {
"Hash": {
"$ref": "#/definitions/HashConfig"
}
},
"additionalProperties": false
}
],
"required": [
Expand Down
28 changes: 24 additions & 4 deletions src/csv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ pub struct Field {
pub value: Value,
}

fn split_row(row: String, config: &Delimiters, row_num: usize) -> Vec<Field> {
fn split_row(row: &str, config: &Delimiters, row_num: usize) -> Vec<Field> {
if let Some(row_delimiter) = config.field_delimiter.as_ref() {
row.split(*row_delimiter)
.enumerate()
Expand All @@ -266,7 +266,7 @@ fn split_row(row: String, config: &Delimiters, row_num: usize) -> Vec<Field> {
row: row_num,
col: 0,
},
value: Value::from_str(row.as_str(), &config.decimal_separator),
value: Value::from_str(row, &config.decimal_separator),
};
vec![field]
}
Expand All @@ -283,7 +283,13 @@ pub fn split_to_fields<R: Read + Seek>(mut input: R, config: &Delimiters) -> Vec
.lines()
.filter_map(|l| l.ok())
.enumerate()
.flat_map(|(row_num, row_value)| split_row(row_value, &delimiters, row_num))
.flat_map(|(row_num, row_value)| {
split_row(
row_value.trim_start_matches('\u{feff}'),
&delimiters,
row_num,
)
})
.collect()
}

Expand Down Expand Up @@ -480,6 +486,7 @@ fn guess_format_from_reader<R: Read + Seek>(mut input: &mut R) -> Delimiters {
mod tests {
use super::*;
use crate::csv::DiffType::{DifferentValueTypes, OutOfTolerance, UnequalStrings};
use std::io::Cursor;

const NOMINAL: &str = "nominal";
const ACTUAL: &str = "actual";
Expand Down Expand Up @@ -879,11 +886,24 @@ mod tests {
field_delimiter: None,
decimal_separator: None,
};
let split_result = split_row(row.to_string(), &delimiters, POS_ROW);
let split_result = split_row(row, &delimiters, POS_ROW);
assert_eq!(split_result.len(), 1);
let field = split_result.first().unwrap();
assert_eq!(field.value.get_string().as_deref().unwrap(), row);
assert_eq!(field.position.row, POS_ROW);
assert_eq!(field.position.col, 0);
}

#[test]
fn bom_is_trimmed() {
let str_with_bom = "\u{feff}Hallo\n\r";
let str_no_bom = "Hallo\n";
let cfg = CSVCompareConfig {
delimiters: Delimiters::default(),
exclude_field_regex: None,
comparison_modes: vec![Mode::Absolute(0.0)],
};
let res = get_diffs_readers(Cursor::new(str_with_bom), Cursor::new(str_no_bom), &cfg);
assert!(res.is_empty());
}
}
104 changes: 104 additions & 0 deletions src/hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
use crate::{report, Deserialize, Serialize};
use data_encoding::HEXLOWER;

use schemars_derive::JsonSchema;
use std::fs::File;
use std::io::Read;
use std::path::Path;

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
pub enum HashFunction {
Sha256,
}

impl HashFunction {
fn hash_file(&self, mut file: impl Read) -> [u8; 32] {
match self {
Self::Sha256 => {
use sha2::{Digest, Sha256};
use std::io;

let mut hasher = Sha256::new();

let _ = io::copy(&mut file, &mut hasher).expect("Could not open file to hash");
let hash_bytes = hasher.finalize();
hash_bytes.into()
}
}
}
}

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
pub struct HashConfig {
function: HashFunction,
}

impl Default for HashConfig {
fn default() -> Self {
HashConfig {
function: HashFunction::Sha256,
}
}
}

pub fn compare_files<P: AsRef<Path>>(
actual_path: P,
nominal_path: P,
config: &HashConfig,
rule_name: &str,
) -> report::FileCompareResult {
let act = config.function.hash_file(
File::open(actual_path.as_ref()).expect("Could not open actual file for hashing"),
);
let nom = config.function.hash_file(
File::open(nominal_path.as_ref()).expect("Could not open actual file for hashing"),
);

let diff = if act != nom {
vec![format!(
"Nominal file's hash is '{}' actual is '{}'",
HEXLOWER.encode(&act),
HEXLOWER.encode(&nom)
)]
} else {
vec![]
};

report::write_html_detail(nominal_path, actual_path, diff.as_slice(), rule_name)
}

#[cfg(test)]
mod test {
use super::*;
use crate::hash::HashFunction::Sha256;

#[test]
fn identity() {
let f1 = Sha256.hash_file(File::open("tests/integ.rs").unwrap());
let f2 = Sha256.hash_file(File::open("tests/integ.rs").unwrap());
assert_eq!(f1, f2);
}

#[test]
fn hash_pinning() {
let sum = "bc3abb411d305c4436185c474be3db2608e910612a573f6791b143d7d749b699";
let f1 = Sha256.hash_file(File::open("tests/integ/data/images/diff_100_DPI.png").unwrap());
assert_eq!(HEXLOWER.encode(&f1), sum);
}

#[test]
fn identity_outer() {
let file = "tests/integ.rs";
let result = compare_files(file, file, &HashConfig::default(), "test");
assert!(!result.is_error);
}

#[test]
fn different_files_throw_outer() {
let file_act = "tests/integ/data/images/actual/SaveImage_100DPI_default_size.jpg";
let file_nominal = "tests/integ/data/images/expected/SaveImage_100DPI_default_size.jpg";

let result = compare_files(file_act, file_nominal, &HashConfig::default(), "test");
assert!(result.is_error);
}
}
7 changes: 7 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
#![deny(deprecated)]

mod csv;
mod hash;
mod html;
mod image;
mod report;

use crate::hash::HashConfig;
use crate::html::HTMLCompareConfig;
use crate::report::FileCompareResult;
use schemars::schema_for;
Expand All @@ -30,6 +32,8 @@ enum ComparisonMode {
Image(image::ImageCompareConfig),
/// plain text compare
PlainText(HTMLCompareConfig),
/// Compare using file hashes
Hash(HashConfig),
}

#[derive(Debug, Deserialize, Serialize, JsonSchema)]
Expand Down Expand Up @@ -92,6 +96,9 @@ fn process_file(
ComparisonMode::PlainText(conf) => {
html::compare_files(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
}
ComparisonMode::Hash(conf) => {
hash::compare_files(nominal.as_ref(), actual.as_ref(), conf, &rule.name)
}
};

if compare_result.is_error {
Expand Down