-
Notifications
You must be signed in to change notification settings - Fork 37
feat(patch_set): support git diff application #59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
e5e4e84
1a83fe9
d1ec236
fe9b1b3
f4e6b2b
edf93e5
90861e0
95836ec
ff392af
e5895d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,42 @@ use std::borrow::Cow; | |
|
|
||
| type Result<T, E = ParsePatchError> = std::result::Result<T, E>; | ||
|
|
||
| /// Options that control parsing behavior. | ||
| /// | ||
| /// Defaults match the [`parse`]/[`parse_bytes`] behavior. | ||
| #[derive(Clone, Copy)] | ||
| pub(crate) struct ParseOpts { | ||
| skip_preamble: bool, | ||
| reject_orphaned_hunks: bool, | ||
| } | ||
|
|
||
| impl Default for ParseOpts { | ||
| fn default() -> Self { | ||
| Self { | ||
| skip_preamble: true, | ||
| reject_orphaned_hunks: false, | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl ParseOpts { | ||
| /// Don't skip preamble lines before `---`/`+++`/`@@`. | ||
| /// | ||
| /// Useful when the caller has already positioned the input | ||
| /// at the start of the patch content. | ||
| pub(crate) fn no_skip_preamble(mut self) -> Self { | ||
| self.skip_preamble = false; | ||
| self | ||
| } | ||
|
|
||
| /// Reject orphaned `@@ ` hunk headers after parsed hunks, | ||
| /// matching `git apply` behavior. | ||
| pub(crate) fn reject_orphaned_hunks(mut self) -> Self { | ||
| self.reject_orphaned_hunks = true; | ||
| self | ||
| } | ||
| } | ||
|
|
||
| struct Parser<'a, T: Text + ?Sized> { | ||
| lines: std::iter::Peekable<LineIter<'a, T>>, | ||
| offset: usize, | ||
|
|
@@ -54,77 +90,65 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> { | |
| } | ||
|
|
||
| pub fn parse(input: &str) -> Result<Patch<'_, str>> { | ||
| let (result, _consumed) = parse_one(input); | ||
| let (result, _consumed) = parse_one(input, ParseOpts::default()); | ||
| result | ||
| } | ||
|
|
||
| pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> { | ||
| let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks()); | ||
| result | ||
| } | ||
|
|
||
| pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> { | ||
| let (result, _consumed) = parse_one(input, ParseOpts::default()); | ||
| result | ||
| } | ||
|
|
||
| pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> { | ||
| let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks()); | ||
| result | ||
| } | ||
|
|
||
| /// Parses one patch from input. | ||
| /// | ||
| /// Always returns consumed bytes alongside the result | ||
| /// so callers can advance past the parsed or partially parsed content. | ||
| pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) { | ||
| pub(crate) fn parse_one<'a, T: Text + ?Sized>( | ||
| input: &'a T, | ||
| opts: ParseOpts, | ||
| ) -> (Result<Patch<'a, T>>, usize) { | ||
| let mut parser = Parser::new(input); | ||
|
|
||
| let header = match patch_header(&mut parser) { | ||
| let header = match patch_header(&mut parser, &opts) { | ||
| Ok(h) => h, | ||
| Err(e) => return (Err(e), parser.offset()), | ||
| }; | ||
| let hunks = match hunks(&mut parser) { | ||
| Ok(h) => h, | ||
| Err(e) => return (Err(e), parser.offset()), | ||
| }; | ||
| if opts.reject_orphaned_hunks { | ||
| if let Err(e) = reject_orphaned_hunk_headers(&mut parser) { | ||
| return (Err(e), parser.offset()); | ||
| } | ||
| } | ||
|
|
||
| let patch = Patch::new( | ||
| header.0.map(convert_cow_to_str), | ||
| header.1.map(convert_cow_to_str), | ||
| header.0.map(T::from_bytes_cow), | ||
| header.1.map(T::from_bytes_cow), | ||
|
Comment on lines
+137
to
+138
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of this we should probably rework patch_header to return a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looking closer, and I'm a bit unsure now. I think there is a fundamental API issue that original filename, modified
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Though it is actually a pre-existing issue in the original
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Posted #65 |
||
| hunks, | ||
| ); | ||
| (Ok(patch), parser.offset()) | ||
| } | ||
|
|
||
| pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> { | ||
| let mut parser = Parser::new(input); | ||
| let header = patch_header(&mut parser)?; | ||
| let hunks = hunks(&mut parser)?; | ||
| reject_orphaned_hunk_headers(&mut parser)?; | ||
|
|
||
| Ok(Patch::new( | ||
| header.0.map(convert_cow_to_str), | ||
| header.1.map(convert_cow_to_str), | ||
| hunks, | ||
| )) | ||
| } | ||
|
|
||
| pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> { | ||
| let mut parser = Parser::new(input); | ||
| let header = patch_header(&mut parser)?; | ||
| let hunks = hunks(&mut parser)?; | ||
|
|
||
| Ok(Patch::new(header.0, header.1, hunks)) | ||
| } | ||
|
|
||
| pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> { | ||
| let mut parser = Parser::new(input); | ||
| let header = patch_header(&mut parser)?; | ||
| let hunks = hunks(&mut parser)?; | ||
| reject_orphaned_hunk_headers(&mut parser)?; | ||
|
|
||
| Ok(Patch::new(header.0, header.1, hunks)) | ||
| } | ||
|
|
||
| // This is only used when the type originated as a utf8 string | ||
| fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> { | ||
| match cow { | ||
| Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(), | ||
| Cow::Owned(o) => String::from_utf8(o).unwrap().into(), | ||
| } | ||
| } | ||
|
|
||
| #[allow(clippy::type_complexity)] | ||
| fn patch_header<'a, T: Text + ToOwned + ?Sized>( | ||
| fn patch_header<'a, T: Text + ?Sized>( | ||
| parser: &mut Parser<'a, T>, | ||
| opts: &ParseOpts, | ||
| ) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> { | ||
| skip_header_preamble(parser)?; | ||
| if opts.skip_preamble { | ||
| skip_header_preamble(parser)?; | ||
| } | ||
|
|
||
| let mut filename1 = None; | ||
| let mut filename2 = None; | ||
|
|
@@ -161,10 +185,7 @@ fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result< | |
| Ok(()) | ||
| } | ||
|
|
||
| fn parse_filename<'a, T: Text + ToOwned + ?Sized>( | ||
| prefix: &str, | ||
| line: &'a T, | ||
| ) -> Result<Cow<'a, [u8]>> { | ||
| fn parse_filename<'a, T: Text + ?Sized>(prefix: &str, line: &'a T) -> Result<Cow<'a, [u8]>> { | ||
| let line = line | ||
| .strip_prefix(prefix) | ||
| .ok_or(ParsePatchErrorKind::InvalidFilename)?; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,11 +13,25 @@ use std::borrow::Cow; | |
| use crate::Patch; | ||
|
|
||
| pub use error::PatchSetParseError; | ||
| use error::PatchSetParseErrorKind; | ||
| pub use parse::PatchSet; | ||
|
|
||
| /// Options for parsing patch content. | ||
| /// | ||
| /// Use [`ParseOptions::unidiff()`] to create options for the desired format. | ||
| /// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`] | ||
| /// to create options for the desired format. | ||
| /// | ||
| /// ## Binary Files | ||
| /// | ||
| /// When parsing git diffs, binary file changes are detected by: | ||
| /// | ||
| /// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag) | ||
| /// * `GIT binary patch` (from `git diff --binary`) | ||
| /// | ||
| /// Note that this is not a documented Git behavior, | ||
| /// so the implementation here is subject to change if Git changes. | ||
| /// | ||
| /// By default, binary diffs are skipped. | ||
| /// | ||
| /// ## Example | ||
| /// | ||
|
|
@@ -40,12 +54,26 @@ pub use parse::PatchSet; | |
| #[derive(Debug, Clone)] | ||
| pub struct ParseOptions { | ||
| pub(crate) format: Format, | ||
| pub(crate) binary: Binary, | ||
| } | ||
|
|
||
| #[derive(Debug, Clone, Copy)] | ||
| #[derive(Debug, Clone, Copy, Default)] | ||
| pub(crate) enum Format { | ||
| /// Standard unified diff format. | ||
| #[default] | ||
| UniDiff, | ||
| /// Git extended diff format. | ||
| GitDiff, | ||
| } | ||
|
|
||
| /// How to handle binary diffs in GitDiff mode. | ||
| #[derive(Debug, Clone, Copy, Default)] | ||
| pub(crate) enum Binary { | ||
| /// Skip binary diffs silently. | ||
| #[default] | ||
| Skip, | ||
| /// Return error if binary diff encountered. | ||
| Fail, | ||
| } | ||
|
|
||
| impl ParseOptions { | ||
|
|
@@ -64,8 +92,38 @@ impl ParseOptions { | |
| pub fn unidiff() -> Self { | ||
| Self { | ||
| format: Format::UniDiff, | ||
| binary: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Parse as [git extended diff format][git-diff-format]. | ||
| /// | ||
| /// Supports all features of [`unidiff()`](Self::unidiff) plus: | ||
| /// | ||
| /// * `diff --git` headers | ||
| /// * Extended headers (`new file mode`, `deleted file mode`, etc.) | ||
| /// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`) | ||
| /// * Binary file detection (skipped by default) | ||
| /// | ||
| /// [git-diff-format]: https://git-scm.com/docs/diff-format | ||
| pub fn gitdiff() -> Self { | ||
| Self { | ||
| format: Format::GitDiff, | ||
| binary: Default::default(), | ||
| } | ||
| } | ||
|
|
||
| /// Skip binary diffs silently. | ||
| pub fn skip_binary(mut self) -> Self { | ||
| self.binary = Binary::Skip; | ||
| self | ||
| } | ||
|
Comment on lines
+116
to
+120
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not good actually. We may want a binary marker like patch so people explicitly know they are skipping something |
||
|
|
||
| /// Return an error if a binary diff is encountered. | ||
| pub fn fail_on_binary(mut self) -> Self { | ||
| self.binary = Binary::Fail; | ||
| self | ||
| } | ||
| } | ||
|
|
||
| /// File mode extracted from git extended headers. | ||
|
|
@@ -81,6 +139,20 @@ pub enum FileMode { | |
| Gitlink, | ||
| } | ||
|
|
||
| impl std::str::FromStr for FileMode { | ||
| type Err = PatchSetParseError; | ||
|
|
||
| fn from_str(mode: &str) -> Result<Self, Self::Err> { | ||
| match mode { | ||
| "100644" => Ok(Self::Regular), | ||
| "100755" => Ok(Self::Executable), | ||
| "120000" => Ok(Self::Symlink), | ||
| "160000" => Ok(Self::Gitlink), | ||
| _ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// The kind of patch content in a [`FilePatch`]. | ||
| #[derive(Clone, PartialEq, Eq)] | ||
| pub enum PatchKind<'a, T: ToOwned + ?Sized> { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need to fix this here but maybe worth a follow up once we're done (and before a release) to try and dedup the api surface area. Maybe just include a TODO comment here for the time being?