From 88cc76d2f45ae00df6c0ecc26a299706185d6248 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 24 Aug 2025 14:38:32 -0400 Subject: [PATCH 1/9] Improve automatic symbol pairing for nameless literals --- objdiff-core/src/diff/mod.rs | 93 ++++++++++++++++++++++++++++-------- 1 file changed, 74 insertions(+), 19 deletions(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index dd10148..e0f2e94 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -588,8 +588,8 @@ fn matching_symbols( } let symbol_match = SymbolMatch { left: Some(symbol_idx), - right: find_symbol(right, left, symbol, Some(&right_used)), - prev: find_symbol(prev, left, symbol, None), + right: find_symbol(right, left, symbol_idx, Some(&right_used)), + prev: find_symbol(prev, left, symbol_idx, None), section_kind, }; matches.push(symbol_match); @@ -613,7 +613,7 @@ fn matching_symbols( matches.push(SymbolMatch { left: None, right: Some(symbol_idx), - prev: find_symbol(prev, right, symbol, None), + prev: find_symbol(prev, right, symbol_idx, None), section_kind, }); } @@ -645,6 +645,13 @@ fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str } } +fn symbol_section_name<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<&'obj str> { + if let Some((name, _kind)) = symbol_section(obj, symbol) { + return Some(name); + } + None +} + fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind { match symbol.section { Some(section_index) => obj.sections[section_index].kind, @@ -653,35 +660,81 @@ fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind { } } +/// Check if a symbol is a compiler-generated literal like @1234. +fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool { + if !symbol.name.starts_with('@') { + return false; + } + if !symbol.name[1..].chars().all(char::is_numeric) { + // Exclude @stringBase0, @GUARD@, etc. + return false; + } + return true; +} + fn find_symbol( obj: Option<&Object>, in_obj: &Object, - in_symbol: &Symbol, + in_symbol_idx: usize, used: Option<&BTreeSet>, ) -> Option { + let in_symbol = &in_obj.symbols[in_symbol_idx]; let obj = obj?; let (section_name, section_kind) = symbol_section(in_obj, in_symbol)?; - // Try to find an exact name match - if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| { - symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind - }) { - return Some(symbol_idx); - } + // Match compiler-generated symbols against each other (e.g. @251 -> @60) - // If they are at the same address in the same section - if in_symbol.name.starts_with('@') + // If they are in the same section and have the same value + if is_symbol_compiler_generated_literal(in_symbol) && matches!(section_kind, SectionKind::Data | SectionKind::Bss) - && let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| { + { + let mut closest_match_symbol_idx = None; + let mut closest_match_percent = 0.0; + for (symbol_idx, symbol) in unmatched_symbols(obj, used) { let Some(section_index) = symbol.section else { - return false; + continue; }; - symbol.name.starts_with('@') - && symbol.address == in_symbol.address - && obj.sections[section_index].name == section_name - }) - { + if obj.sections[section_index].name != section_name { + continue; + } + if !is_symbol_compiler_generated_literal(symbol) { + continue; + } + match section_kind { + SectionKind::Data => { + // For data, we try to pick the first symbol that matches 100%. + // If no symbol is a perfect match, pick whichever matches the closest. + if let Ok((left_diff, _right_diff)) = + diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx) + && let Some(match_percent) = left_diff.match_percent + && match_percent > closest_match_percent + { + closest_match_symbol_idx = Some(symbol_idx); + closest_match_percent = match_percent; + if match_percent == 100.0 { + break; + } + } + } + SectionKind::Bss => { + // For BSS, we simply pick the first symbol that has the exact matching size. + if in_symbol.size == symbol.size { + closest_match_symbol_idx = Some(symbol_idx); + break; + } + } + _ => unreachable!(), + } + } + return closest_match_symbol_idx; + } + + // Try to find an exact name match + if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|(_, symbol)| { + symbol.name == in_symbol.name && symbol_section_kind(obj, symbol) == section_kind + }) { return Some(symbol_idx); } + // Match Metrowerks symbol$1234 against symbol$2345 if let Some((prefix, suffix)) = in_symbol.name.split_once('$') { if !suffix.chars().all(char::is_numeric) { @@ -692,6 +745,7 @@ fn find_symbol( prefix == p && s.chars().all(char::is_numeric) && symbol_section_kind(obj, symbol) == section_kind + && symbol_section_name(obj, symbol) == Some(section_name) } else { false } @@ -699,6 +753,7 @@ fn find_symbol( return Some(symbol_idx); } } + None } From f8e7478c9288bff4e993e864c99f6d69bb26d511 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 24 Aug 2025 14:40:11 -0400 Subject: [PATCH 2/9] Fix data reloc diffing when the reloc points to an in-function static symbol --- objdiff-core/src/diff/data.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/objdiff-core/src/diff/data.rs b/objdiff-core/src/diff/data.rs index ecf87f2..5818230 100644 --- a/objdiff-core/src/diff/data.rs +++ b/objdiff-core/src/diff/data.rs @@ -35,6 +35,22 @@ pub fn diff_bss_symbol( )) } +fn symbol_name_matches(left_name: &str, right_name: &str) -> bool { + // Match Metrowerks symbol$1234 against symbol$2345 + if let Some((prefix, suffix)) = left_name.split_once('$') { + if !suffix.chars().all(char::is_numeric) { + return false; + } + if let Some((p, s)) = right_name.split_once('$') { + prefix == p && s.chars().all(char::is_numeric) + } else { + false + } + } else { + left_name == right_name + } +} + fn reloc_eq( left_obj: &Object, right_obj: &Object, @@ -45,8 +61,8 @@ fn reloc_eq( return false; } - let symbol_name_addend_matches = - left.symbol.name == right.symbol.name && left.relocation.addend == right.relocation.addend; + let symbol_name_addend_matches = symbol_name_matches(&left.symbol.name, &right.symbol.name) + && left.relocation.addend == right.relocation.addend; match (left.symbol.section, right.symbol.section) { (Some(sl), Some(sr)) => { // Match if section and name+addend or address match From 40f779116005a26831097ac9856d6a23742cc029 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 24 Aug 2025 14:51:44 -0400 Subject: [PATCH 3/9] Only pair up literals that match perfectly --- objdiff-core/src/diff/mod.rs | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index e0f2e94..abed844 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -687,8 +687,7 @@ fn find_symbol( if is_symbol_compiler_generated_literal(in_symbol) && matches!(section_kind, SectionKind::Data | SectionKind::Bss) { - let mut closest_match_symbol_idx = None; - let mut closest_match_percent = 0.0; + let mut matching_symbol_idx = None; for (symbol_idx, symbol) in unmatched_symbols(obj, used) { let Some(section_index) = symbol.section else { continue; @@ -701,31 +700,27 @@ fn find_symbol( } match section_kind { SectionKind::Data => { - // For data, we try to pick the first symbol that matches 100%. - // If no symbol is a perfect match, pick whichever matches the closest. + // For data, pick the first symbol that has the exact matching bytes and relocations. if let Ok((left_diff, _right_diff)) = diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx) && let Some(match_percent) = left_diff.match_percent - && match_percent > closest_match_percent + && match_percent == 100.0 { - closest_match_symbol_idx = Some(symbol_idx); - closest_match_percent = match_percent; - if match_percent == 100.0 { - break; - } + matching_symbol_idx = Some(symbol_idx); + break; } } SectionKind::Bss => { - // For BSS, we simply pick the first symbol that has the exact matching size. + // For BSS, pick the first symbol that has the exact matching size. if in_symbol.size == symbol.size { - closest_match_symbol_idx = Some(symbol_idx); + matching_symbol_idx = Some(symbol_idx); break; } } _ => unreachable!(), } } - return closest_match_symbol_idx; + return matching_symbol_idx; } // Try to find an exact name match From 81163a6f378eafb6e2edce24f374e617109785ab Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 24 Aug 2025 15:45:27 -0400 Subject: [PATCH 4/9] Clippy --- objdiff-core/src/diff/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index abed844..4756f41 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -669,7 +669,7 @@ fn is_symbol_compiler_generated_literal(symbol: &Symbol) -> bool { // Exclude @stringBase0, @GUARD@, etc. return false; } - return true; + true } fn find_symbol( From 8e4615e1a364ef6bc6a3ae7920aa186b2bd397dd Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sun, 24 Aug 2025 16:33:30 -0400 Subject: [PATCH 5/9] Do two separate passes when pairing up literals --- objdiff-core/src/diff/mod.rs | 106 +++++++++++++++++++++-------------- 1 file changed, 63 insertions(+), 43 deletions(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index 4756f41..022b6d6 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -575,47 +575,57 @@ fn matching_symbols( &mut matches, )?; } - for (symbol_idx, symbol) in left.symbols.iter().enumerate() { - if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { - continue; - } - let section_kind = symbol_section_kind(left, symbol); - if section_kind == SectionKind::Unknown { - continue; - } - if left_used.contains(&symbol_idx) { - continue; - } - let symbol_match = SymbolMatch { - left: Some(symbol_idx), - right: find_symbol(right, left, symbol_idx, Some(&right_used)), - prev: find_symbol(prev, left, symbol_idx, None), - section_kind, - }; - matches.push(symbol_match); - if let Some(right) = symbol_match.right { - right_used.insert(right); + // Do two passes for nameless literals. The first only pairs up perfect matches to ensure + // those are correct first, while the second pass catches near matches. + for fuzzy_literals in [false, true] { + for (symbol_idx, symbol) in left.symbols.iter().enumerate() { + if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { + continue; + } + let section_kind = symbol_section_kind(left, symbol); + if section_kind == SectionKind::Unknown { + continue; + } + if left_used.contains(&symbol_idx) { + continue; + } + let symbol_match = SymbolMatch { + left: Some(symbol_idx), + right: find_symbol(right, left, symbol_idx, Some(&right_used), fuzzy_literals), + prev: find_symbol(prev, left, symbol_idx, None, fuzzy_literals), + section_kind, + }; + matches.push(symbol_match); + left_used.insert(symbol_idx); + if let Some(right) = symbol_match.right { + right_used.insert(right); + } } } } if let Some(right) = right { - for (symbol_idx, symbol) in right.symbols.iter().enumerate() { - if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { - continue; - } - let section_kind = symbol_section_kind(right, symbol); - if section_kind == SectionKind::Unknown { - continue; - } - if right_used.contains(&symbol_idx) { - continue; + // Do two passes for nameless literals. The first only pairs up perfect matches to ensure + // those are correct first, while the second pass catches near matches. + for fuzzy_literals in [false, true] { + for (symbol_idx, symbol) in right.symbols.iter().enumerate() { + if symbol.size == 0 || symbol.flags.contains(SymbolFlag::Ignored) { + continue; + } + let section_kind = symbol_section_kind(right, symbol); + if section_kind == SectionKind::Unknown { + continue; + } + if right_used.contains(&symbol_idx) { + continue; + } + matches.push(SymbolMatch { + left: None, + right: Some(symbol_idx), + prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals), + section_kind, + }); + right_used.insert(symbol_idx); } - matches.push(SymbolMatch { - left: None, - right: Some(symbol_idx), - prev: find_symbol(prev, right, symbol_idx, None), - section_kind, - }); } } Ok(matches) @@ -677,6 +687,7 @@ fn find_symbol( in_obj: &Object, in_symbol_idx: usize, used: Option<&BTreeSet>, + fuzzy_literals: bool, ) -> Option { let in_symbol = &in_obj.symbols[in_symbol_idx]; let obj = obj?; @@ -687,7 +698,8 @@ fn find_symbol( if is_symbol_compiler_generated_literal(in_symbol) && matches!(section_kind, SectionKind::Data | SectionKind::Bss) { - let mut matching_symbol_idx = None; + let mut closest_match_symbol_idx = None; + let mut closest_match_percent = 0.0; for (symbol_idx, symbol) in unmatched_symbols(obj, used) { let Some(section_index) = symbol.section else { continue; @@ -700,27 +712,35 @@ fn find_symbol( } match section_kind { SectionKind::Data => { - // For data, pick the first symbol that has the exact matching bytes and relocations. + // For data, pick the first symbol with exactly matching bytes and relocations. + // If no symbols match exactly, and `fuzzy_literals` is true, pick the closest + // plausible match instead. if let Ok((left_diff, _right_diff)) = diff_data_symbol(in_obj, obj, in_symbol_idx, symbol_idx) && let Some(match_percent) = left_diff.match_percent - && match_percent == 100.0 + && (match_percent == 100.0 + || (fuzzy_literals + && match_percent >= 50.0 + && match_percent > closest_match_percent)) { - matching_symbol_idx = Some(symbol_idx); - break; + closest_match_symbol_idx = Some(symbol_idx); + closest_match_percent = match_percent; + if match_percent == 100.0 { + break; + } } } SectionKind::Bss => { // For BSS, pick the first symbol that has the exact matching size. if in_symbol.size == symbol.size { - matching_symbol_idx = Some(symbol_idx); + closest_match_symbol_idx = Some(symbol_idx); break; } } _ => unreachable!(), } } - return matching_symbol_idx; + return closest_match_symbol_idx; } // Try to find an exact name match From 5b8009e89f6feef1a3d8e38cd14964fb384ab6e1 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Tue, 26 Aug 2025 23:18:16 -0400 Subject: [PATCH 6/9] Fix partially-matching literal pairups not working right --- objdiff-core/src/diff/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index 022b6d6..005a0ee 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -596,8 +596,8 @@ fn matching_symbols( section_kind, }; matches.push(symbol_match); - left_used.insert(symbol_idx); if let Some(right) = symbol_match.right { + left_used.insert(symbol_idx); right_used.insert(right); } } @@ -618,13 +618,16 @@ fn matching_symbols( if right_used.contains(&symbol_idx) { continue; } - matches.push(SymbolMatch { + let symbol_match = SymbolMatch { left: None, right: Some(symbol_idx), prev: find_symbol(prev, right, symbol_idx, None, fuzzy_literals), section_kind, - }); - right_used.insert(symbol_idx); + }; + matches.push(symbol_match); + if symbol_match.prev.is_some() { + right_used.insert(symbol_idx); + } } } } From 880758088e5505a66df2fd0186db74b03da5c71c Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sat, 30 Aug 2025 14:27:38 -0400 Subject: [PATCH 7/9] Remove duplicate $ splitting code --- objdiff-core/src/diff/data.rs | 2 +- objdiff-core/src/diff/mod.rs | 25 +++++++------------------ 2 files changed, 8 insertions(+), 19 deletions(-) diff --git a/objdiff-core/src/diff/data.rs b/objdiff-core/src/diff/data.rs index 5818230..6e9312c 100644 --- a/objdiff-core/src/diff/data.rs +++ b/objdiff-core/src/diff/data.rs @@ -35,7 +35,7 @@ pub fn diff_bss_symbol( )) } -fn symbol_name_matches(left_name: &str, right_name: &str) -> bool { +pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool { // Match Metrowerks symbol$1234 against symbol$2345 if let Some((prefix, suffix)) = left_name.split_once('$') { if !suffix.chars().all(char::is_numeric) { diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index 005a0ee..224af68 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -13,7 +13,7 @@ use crate::{ code::{diff_code, no_diff_code}, data::{ diff_bss_section, diff_bss_symbol, diff_data_section, diff_data_symbol, - diff_generic_section, no_diff_bss_section, no_diff_data_section, + diff_generic_section, no_diff_bss_section, no_diff_data_section, symbol_name_matches, }, }, obj::{InstructionRef, Object, Relocation, SectionKind, Symbol, SymbolFlag}, @@ -753,23 +753,12 @@ fn find_symbol( return Some(symbol_idx); } - // Match Metrowerks symbol$1234 against symbol$2345 - if let Some((prefix, suffix)) = in_symbol.name.split_once('$') { - if !suffix.chars().all(char::is_numeric) { - return None; - } - if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| { - if let Some((p, s)) = symbol.name.split_once('$') { - prefix == p - && s.chars().all(char::is_numeric) - && symbol_section_kind(obj, symbol) == section_kind - && symbol_section_name(obj, symbol) == Some(section_name) - } else { - false - } - }) { - return Some(symbol_idx); - } + if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| { + symbol_name_matches(&in_symbol.name, &symbol.name) + && symbol_section_kind(obj, symbol) == section_kind + && symbol_section_name(obj, symbol) == Some(section_name) + }) { + return Some(symbol_idx); } None From 3e742254022b0752cb9c47d7e2738b22aa2a3ac1 Mon Sep 17 00:00:00 2001 From: LagoLunatic Date: Sat, 30 Aug 2025 14:46:19 -0400 Subject: [PATCH 8/9] Implement $ splitting for section names too --- objdiff-core/src/diff/mod.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index 224af68..d26d081 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -650,7 +650,13 @@ where fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> { if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) { - Some((section.name.as_str(), section.kind)) + // Match x86 .rdata$r against .rdata$rs + let section_name = if let Some((prefix, _)) = section.name.split_once('$') { + prefix + } else { + section.name.as_str() + }; + Some((section_name, section.kind)) } else if symbol.flags.contains(SymbolFlag::Common) { Some((".comm", SectionKind::Common)) } else { From c6437b4160ad5afbc8ac8c24785bba61bc7a710a Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sat, 30 Aug 2025 23:04:57 -0600 Subject: [PATCH 9/9] Minor cleanup --- objdiff-core/src/diff/data.rs | 8 +++----- objdiff-core/src/diff/mod.rs | 16 +++------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/objdiff-core/src/diff/data.rs b/objdiff-core/src/diff/data.rs index 6e9312c..5d08195 100644 --- a/objdiff-core/src/diff/data.rs +++ b/objdiff-core/src/diff/data.rs @@ -41,11 +41,9 @@ pub fn symbol_name_matches(left_name: &str, right_name: &str) -> bool { if !suffix.chars().all(char::is_numeric) { return false; } - if let Some((p, s)) = right_name.split_once('$') { - prefix == p && s.chars().all(char::is_numeric) - } else { - false - } + right_name + .split_once('$') + .is_some_and(|(p, s)| p == prefix && s.chars().all(char::is_numeric)) } else { left_name == right_name } diff --git a/objdiff-core/src/diff/mod.rs b/objdiff-core/src/diff/mod.rs index d26d081..c7a6688 100644 --- a/objdiff-core/src/diff/mod.rs +++ b/objdiff-core/src/diff/mod.rs @@ -651,11 +651,8 @@ where fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str, SectionKind)> { if let Some(section) = symbol.section.and_then(|section_idx| obj.sections.get(section_idx)) { // Match x86 .rdata$r against .rdata$rs - let section_name = if let Some((prefix, _)) = section.name.split_once('$') { - prefix - } else { - section.name.as_str() - }; + let section_name = + section.name.split_once('$').map_or(section.name.as_str(), |(prefix, _)| prefix); Some((section_name, section.kind)) } else if symbol.flags.contains(SymbolFlag::Common) { Some((".comm", SectionKind::Common)) @@ -664,13 +661,6 @@ fn symbol_section<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<(&'obj str } } -fn symbol_section_name<'obj>(obj: &'obj Object, symbol: &Symbol) -> Option<&'obj str> { - if let Some((name, _kind)) = symbol_section(obj, symbol) { - return Some(name); - } - None -} - fn symbol_section_kind(obj: &Object, symbol: &Symbol) -> SectionKind { match symbol.section { Some(section_index) => obj.sections[section_index].kind, @@ -762,7 +752,7 @@ fn find_symbol( if let Some((symbol_idx, _)) = unmatched_symbols(obj, used).find(|&(_, symbol)| { symbol_name_matches(&in_symbol.name, &symbol.name) && symbol_section_kind(obj, symbol) == section_kind - && symbol_section_name(obj, symbol) == Some(section_name) + && symbol_section(obj, symbol).is_some_and(|(name, _)| name == section_name) }) { return Some(symbol_idx); }