Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/replay.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
parse_mode: [unidiff]
parse_mode: [unidiff, gitdiff]
name: ${{ inputs.name && matrix.parse_mode || format('{0} ({1}, {2})', inputs.repo_url, matrix.parse_mode, inputs.commits) }}
steps:
- uses: actions/checkout@v6
Expand Down
119 changes: 70 additions & 49 deletions src/patch/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,42 @@ use std::borrow::Cow;

type Result<T, E = ParsePatchError> = std::result::Result<T, E>;

/// Options that control parsing behavior.
///
/// Defaults match the [`parse`]/[`parse_bytes`] behavior.
#[derive(Clone, Copy)]
pub(crate) struct ParseOpts {
skip_preamble: bool,
reject_orphaned_hunks: bool,
}

impl Default for ParseOpts {
fn default() -> Self {
Self {
skip_preamble: true,
reject_orphaned_hunks: false,
}
}
}

impl ParseOpts {
/// Don't skip preamble lines before `---`/`+++`/`@@`.
///
/// Useful when the caller has already positioned the input
/// at the start of the patch content.
pub(crate) fn no_skip_preamble(mut self) -> Self {
self.skip_preamble = false;
self
}

/// Reject orphaned `@@ ` hunk headers after parsed hunks,
/// matching `git apply` behavior.
pub(crate) fn reject_orphaned_hunks(mut self) -> Self {
self.reject_orphaned_hunks = true;
self
}
}

struct Parser<'a, T: Text + ?Sized> {
lines: std::iter::Peekable<LineIter<'a, T>>,
offset: usize,
Expand Down Expand Up @@ -54,77 +90,65 @@ impl<'a, T: Text + ?Sized> Parser<'a, T> {
}

pub fn parse(input: &str) -> Result<Patch<'_, str>> {
let (result, _consumed) = parse_one(input);
let (result, _consumed) = parse_one(input, ParseOpts::default());
result
}

pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
result
}

pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
let (result, _consumed) = parse_one(input, ParseOpts::default());
result
}

pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
let (result, _consumed) = parse_one(input, ParseOpts::default().reject_orphaned_hunks());
result
}

/// Parses one patch from input.
///
/// Always returns consumed bytes alongside the result
/// so callers can advance past the parsed or partially parsed content.
pub(crate) fn parse_one(input: &str) -> (Result<Patch<'_, str>>, usize) {
pub(crate) fn parse_one<'a, T: Text + ?Sized>(
input: &'a T,
opts: ParseOpts,
) -> (Result<Patch<'a, T>>, usize) {
let mut parser = Parser::new(input);

let header = match patch_header(&mut parser) {
let header = match patch_header(&mut parser, &opts) {
Ok(h) => h,
Err(e) => return (Err(e), parser.offset()),
};
let hunks = match hunks(&mut parser) {
Ok(h) => h,
Err(e) => return (Err(e), parser.offset()),
};
if opts.reject_orphaned_hunks {
if let Err(e) = reject_orphaned_hunk_headers(&mut parser) {
return (Err(e), parser.offset());
}
}

let patch = Patch::new(
header.0.map(convert_cow_to_str),
header.1.map(convert_cow_to_str),
header.0.map(T::from_bytes_cow),
header.1.map(T::from_bytes_cow),
hunks,
);
(Ok(patch), parser.offset())
}

pub fn parse_strict(input: &str) -> Result<Patch<'_, str>> {
let mut parser = Parser::new(input);
let header = patch_header(&mut parser)?;
let hunks = hunks(&mut parser)?;
reject_orphaned_hunk_headers(&mut parser)?;

Ok(Patch::new(
header.0.map(convert_cow_to_str),
header.1.map(convert_cow_to_str),
hunks,
))
}

pub fn parse_bytes(input: &[u8]) -> Result<Patch<'_, [u8]>> {
let mut parser = Parser::new(input);
let header = patch_header(&mut parser)?;
let hunks = hunks(&mut parser)?;

Ok(Patch::new(header.0, header.1, hunks))
}

pub fn parse_bytes_strict(input: &[u8]) -> Result<Patch<'_, [u8]>> {
let mut parser = Parser::new(input);
let header = patch_header(&mut parser)?;
let hunks = hunks(&mut parser)?;
reject_orphaned_hunk_headers(&mut parser)?;

Ok(Patch::new(header.0, header.1, hunks))
}

// This is only used when the type originated as a utf8 string
fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> {
match cow {
Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(),
Cow::Owned(o) => String::from_utf8(o).unwrap().into(),
}
}

#[allow(clippy::type_complexity)]
fn patch_header<'a, T: Text + ToOwned + ?Sized>(
fn patch_header<'a, T: Text + ?Sized>(
parser: &mut Parser<'a, T>,
opts: &ParseOpts,
) -> Result<(Option<Cow<'a, [u8]>>, Option<Cow<'a, [u8]>>)> {
skip_header_preamble(parser)?;
if opts.skip_preamble {
skip_header_preamble(parser)?;
}

let mut filename1 = None;
let mut filename2 = None;
Expand Down Expand Up @@ -161,10 +185,7 @@ fn skip_header_preamble<T: Text + ?Sized>(parser: &mut Parser<'_, T>) -> Result<
Ok(())
}

fn parse_filename<'a, T: Text + ToOwned + ?Sized>(
prefix: &str,
line: &'a T,
) -> Result<Cow<'a, [u8]>> {
fn parse_filename<'a, T: Text + ?Sized>(prefix: &str, line: &'a T) -> Result<Cow<'a, [u8]>> {
let line = line
.strip_prefix(prefix)
.ok_or(ParsePatchErrorKind::InvalidFilename)?;
Expand Down
14 changes: 14 additions & 0 deletions src/patch_set/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,15 @@ pub(crate) enum PatchSetParseErrorKind {

/// Create patch missing modified path.
CreateMissingModifiedPath,

/// Invalid file mode string.
InvalidFileMode(String),

/// Invalid `diff --git` path.
InvalidDiffGitPath,

/// Binary diff not supported in current configuration.
BinaryNotSupported { path: String },
}

impl fmt::Display for PatchSetParseErrorKind {
Expand All @@ -81,6 +90,11 @@ impl fmt::Display for PatchSetParseErrorKind {
Self::BothDevNull => write!(f, "patch has both original and modified as /dev/null"),
Self::DeleteMissingOriginalPath => write!(f, "delete patch has no original path"),
Self::CreateMissingModifiedPath => write!(f, "create patch has no modified path"),
Self::InvalidFileMode(mode) => write!(f, "invalid file mode: {mode}"),
Self::InvalidDiffGitPath => write!(f, "invalid diff --git path"),
Self::BinaryNotSupported { path } => {
write!(f, "binary diff not supported: {path}")
}
}
}
}
Expand Down
76 changes: 74 additions & 2 deletions src/patch_set/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,25 @@ use std::borrow::Cow;
use crate::Patch;

pub use error::PatchSetParseError;
use error::PatchSetParseErrorKind;
pub use parse::PatchSet;

/// Options for parsing patch content.
///
/// Use [`ParseOptions::unidiff()`] to create options for the desired format.
/// Use [`ParseOptions::unidiff()`] or [`ParseOptions::gitdiff()`]
/// to create options for the desired format.
///
/// ## Binary Files
///
/// When parsing git diffs, binary file changes are detected by:
///
/// * `Binary files a/path and b/path differ` (`git diff` without `--binary` flag)
/// * `GIT binary patch` (from `git diff --binary`)
///
/// Note that this is not a documented Git behavior,
/// so the implementation here is subject to change if Git changes.
///
/// By default, binary diffs are skipped.
///
/// ## Example
///
Expand All @@ -40,12 +54,26 @@ pub use parse::PatchSet;
#[derive(Debug, Clone)]
pub struct ParseOptions {
pub(crate) format: Format,
pub(crate) binary: Binary,
}

#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Default)]
pub(crate) enum Format {
/// Standard unified diff format.
#[default]
UniDiff,
/// Git extended diff format.
GitDiff,
}

/// How to handle binary diffs in GitDiff mode.
#[derive(Debug, Clone, Copy, Default)]
pub(crate) enum Binary {
/// Skip binary diffs silently.
#[default]
Skip,
/// Return error if binary diff encountered.
Fail,
}

impl ParseOptions {
Expand All @@ -64,8 +92,38 @@ impl ParseOptions {
pub fn unidiff() -> Self {
Self {
format: Format::UniDiff,
binary: Default::default(),
}
}

/// Parse as [git extended diff format][git-diff-format].
///
/// Supports all features of [`unidiff()`](Self::unidiff) plus:
///
/// * `diff --git` headers
/// * Extended headers (`new file mode`, `deleted file mode`, etc.)
/// * Rename/copy detection (`rename from`/`rename to`, `copy from`/`copy to`)
/// * Binary file detection (skipped by default)
///
/// [git-diff-format]: https://git-scm.com/docs/diff-format
pub fn gitdiff() -> Self {
Self {
format: Format::GitDiff,
binary: Default::default(),
}
}

/// Skip binary diffs silently.
pub fn skip_binary(mut self) -> Self {
self.binary = Binary::Skip;
self
}
Comment on lines +116 to +120
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not good actually. We may want a binary marker like patch so people explicitly know they are skipping something


/// Return an error if a binary diff is encountered.
pub fn fail_on_binary(mut self) -> Self {
self.binary = Binary::Fail;
self
}
}

/// File mode extracted from git extended headers.
Expand All @@ -81,6 +139,20 @@ pub enum FileMode {
Gitlink,
}

impl std::str::FromStr for FileMode {
type Err = PatchSetParseError;

fn from_str(mode: &str) -> Result<Self, Self::Err> {
match mode {
"100644" => Ok(Self::Regular),
"100755" => Ok(Self::Executable),
"120000" => Ok(Self::Symlink),
"160000" => Ok(Self::Gitlink),
_ => Err(PatchSetParseErrorKind::InvalidFileMode(mode.to_owned()).into()),
}
}
}

/// The kind of patch content in a [`FilePatch`].
#[derive(Clone, PartialEq, Eq)]
pub enum PatchKind<'a, T: ToOwned + ?Sized> {
Expand Down
Loading