From 53b06e63ef055e89f4bcb9362b17ba4c3cf8f192 Mon Sep 17 00:00:00 2001 From: HouXiaoxuan Date: Fri, 29 Nov 2024 22:16:36 +0800 Subject: [PATCH 1/2] fix(libra): diff show add & delete blobs. Signed-off-by: HouXiaoxuan --- libra/src/command/diff.rs | 81 +++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/libra/src/command/diff.rs b/libra/src/command/diff.rs index 20476b196..ece2b5a12 100644 --- a/libra/src/command/diff.rs +++ b/libra/src/command/diff.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt, io::{self, Write}, path::PathBuf, @@ -114,6 +114,7 @@ pub async fn execute(args: DiffArgs) { index.tracked_files() } else { // use working directory as new commit + // NOTE: git didn't show diff for untracked files, but we do util::list_workdir_files().unwrap() }; get_files_blobs(&files) @@ -159,6 +160,16 @@ pub async fn diff( w: &mut dyn io::Write, ) { let old_blobs: HashMap = old_blobs.into_iter().collect(); + let new_blobs: HashMap = new_blobs.into_iter().collect(); + // unison set + let union_files: HashSet = old_blobs.keys().chain(new_blobs.keys()).cloned().collect(); + tracing::debug!( + "old blobs {:?}, new blobs {:?}, union files {:?}", + old_blobs.len(), + new_blobs.len(), + union_files.len() + ); + let read_content = |file: &PathBuf, hash: &SHA1| { // read content from blob or file match load_object::(hash) { @@ -173,40 +184,52 @@ pub async fn diff( } } }; + // filter files, cross old and new files, and pathspec - for (new_file, new_hash) in new_blobs { + for file in union_files { // if new_file did't start with any path in filter, skip it - if !filter.is_empty() && !filter.iter().any(|path| new_file.sub_of(path)) { + if !filter.is_empty() && !filter.iter().any(|path| file.sub_of(path)) { continue; } - match old_blobs.get(&new_file) { - Some(old_hash) => { - if old_hash == &new_hash { - continue; - } - let old_content = read_content(&new_file, old_hash); - let new_content = read_content(&new_file, &new_hash); - writeln!( - w, - "diff --git a/{} b/{}", - new_file.display(), - new_file.display() // files name is always the same, current did't support rename - ) - .unwrap(); - writeln!( - w, - "index {}..{}", - &old_hash.to_plain_str()[0..8], - &new_hash.to_plain_str()[0..8] - ) - .unwrap(); - diff_result(&old_content, &new_content, w); - } - None => { - continue; - } + let new_hash = new_blobs.get(&file); + let old_hash = old_blobs.get(&file); + if new_hash == old_hash { + continue; } + + let old_content = match &old_hash.as_ref() { + Some(hash) => read_content(&file, hash), + None => String::new(), + }; + let new_content = match &new_hash.as_ref() { + Some(hash) => read_content(&file, hash), + None => String::new(), + }; + + writeln!( + w, + "diff --git a/{} b/{}", + file.display(), + file.display() // files name is always the same, current did't support rename + ) + .unwrap(); + + if old_hash.is_none() { + writeln!(w, "new file mode 100644").unwrap(); + } else if new_hash.is_none() { + writeln!(w, "deleted file mode 100644").unwrap(); + } + + let old_index = old_hash.map_or("0000000".to_string(), |h| { + h.to_plain_str()[0..8].to_string() + }); + let new_index = new_hash.map_or("0000000".to_string(), |h| { + h.to_plain_str()[0..8].to_string() + }); + writeln!(w, "index {}..{}", old_index, new_index).unwrap(); + + diff_result(&old_content, &new_content, w); } } From 8ba69a5829a2c6775140a825206e9cb4016b0a0f Mon Sep 17 00:00:00 2001 From: HouXiaoxuan Date: Fri, 29 Nov 2024 22:25:21 +0800 Subject: [PATCH 2/2] feat(libra): use `imara_diff` to replace `similar` crate. Signed-off-by: HouXiaoxuan --- libra/Cargo.toml | 1 + libra/src/command/diff.rs | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/libra/Cargo.toml b/libra/Cargo.toml index cdd1c5cd6..3b6346a9f 100644 --- a/libra/Cargo.toml +++ b/libra/Cargo.toml @@ -22,6 +22,7 @@ futures = { workspace = true } futures-util = { workspace = true } gemini = { workspace = true, optional = true } hex = { workspace = true } +imara-diff = "0.1.7" indicatif = "0.17.8" lazy_static = { workspace = true } lru-mem = "0.3.0" diff --git a/libra/src/command/diff.rs b/libra/src/command/diff.rs index ece2b5a12..67629a01d 100644 --- a/libra/src/command/diff.rs +++ b/libra/src/command/diff.rs @@ -6,6 +6,7 @@ use std::{ }; use clap::Parser; +use imara_diff::{intern::InternedInput, Algorithm, UnifiedDiffBuilder}; use mercury::{ hash::SHA1, internal::{ @@ -229,7 +230,8 @@ pub async fn diff( }); writeln!(w, "index {}..{}", old_index, new_index).unwrap(); - diff_result(&old_content, &new_content, w); + // diff_result(&old_content, &new_content, w); + imara_diff_result(&old_content, &new_content, w); } } @@ -262,7 +264,8 @@ impl fmt::Display for Line { } } -fn diff_result(old: &str, new: &str, w: &mut dyn io::Write) { +#[allow(dead_code)] +fn similar_diff_result(old: &str, new: &str, w: &mut dyn io::Write) { let diff = similar::TextDiff::from_lines(old, new); for (idx, group) in diff.grouped_ops(3).iter().enumerate() { if idx > 0 { @@ -292,6 +295,16 @@ fn diff_result(old: &str, new: &str, w: &mut dyn io::Write) { } } +fn imara_diff_result(old: &str, new: &str, w: &mut dyn io::Write) { + let input = InternedInput::new(old, new); + let diff = imara_diff::diff( + Algorithm::Histogram, + &input, + UnifiedDiffBuilder::new(&input), + ); + write!(w, "{}", diff).unwrap(); +} + #[cfg(test)] mod test { use super::*; @@ -333,11 +346,11 @@ mod test { } #[test] - fn test_diff_result() { + fn test_similar_diff_result() { let old = "Hello World\nThis is the second line.\nThis is the third."; let new = "Hallo Welt\nThis is the second line.\nThis is life.\nMoar and more"; let mut buf = Vec::new(); - diff_result(old, new, &mut buf); + similar_diff_result(old, new, &mut buf); let result = String::from_utf8(buf).unwrap(); println!("{}", result); }