Permalink
Browse files

Handle fuzzy diff'ing of arrays

  • Loading branch information...
andreyvit committed Apr 17, 2012
1 parent 2c0305c commit 429a86c89c95e782a76136690a1f877d6dd3f085
Showing with 173 additions and 40 deletions.
  1. +123 −22 lib/index.iced
  2. +50 −18 test/diff_test.coffee
View
@@ -3,66 +3,167 @@
extendedTypeOf = (obj) ->
result = typeof obj
- if result is 'object' and obj.constructor is 'Array'
+ if result is 'object' and obj.constructor is Array
'array'
else
result
+isScalar = (obj) -> (typeof obj isnt 'object')
-emptyDiffStats = ->
- matchedKeys: 0
- unmatchedKeys: 0
-
-objectDiff = (obj1, obj2, stats) ->
+objectDiff = (obj1, obj2) ->
result = {}
+ score = 0
keys1 = Object.keys(obj1)
keys2 = Object.keys(obj2)
for own key, value1 of obj1 when !(key of obj2)
result["#{key}__deleted"] = value1
- stats.unmatchedKeys++ if stats
+ score -= 30
for own key, value2 of obj2 when !(key of obj1)
result["#{key}__added"] = value2
- stats.unmatchedKeys++ if stats
+ score -= 30
for own key, value1 of obj1 when key of obj2
+ score += 20
value2 = obj2[key]
- if change = diff(value1, value2)
+ [subscore, change] = diffWithScore(value1, value2)
+ if change
result[key] = change
- stats.matchedKeys++ if stats
- else
- stats.matchedKeys++ if stats
+ # console.log "key #{key} subscore=#{subscore}"
+ score += Math.min(20, Math.max(-10, subscore / 5)) # BATMAN!
if Object.keys(result).length is 0
- return undefined
+ [score, result] = [100 * Object.keys(obj1).length, undefined]
+ else
+ score = Math.max(0, score)
+
+ # console.log "objectDiff(#{JSON.stringify(obj1, null, 2)} <=> #{JSON.stringify(obj2, null, 2)}) == #{JSON.stringify([score, result])}"
+
+ return [score, result]
- return result
+
+findMatchingObject = (item, fuzzyOriginals) ->
+ # console.log "findMatchingObject: " + JSON.stringify({item, fuzzyOriginals}, null, 2)
+ bestMatch = null
+
+ for own key, candidate of fuzzyOriginals when key isnt '__next'
+ if extendedTypeOf(item) == extendedTypeOf(candidate)
+ score = diffScore(item, candidate)
+ if !bestMatch || score > bestMatch.score
+ bestMatch = { score, key }
+
+ # console.log "findMatchingObject result = " + JSON.stringify(bestMatch, null, 2)
+ bestMatch
+
+
+scalarize = (array, originals, fuzzyOriginals) ->
+ for item in array
+ if isScalar item
+ item
+ else if fuzzyOriginals && (bestMatch = findMatchingObject(item, fuzzyOriginals)) && bestMatch.score > 40
+ originals[bestMatch.key] = item
+ bestMatch.key
+ else
+ proxy = "__$!SCALAR" + originals.__next++
+ originals[proxy] = item
+ proxy
+
+isScalarized = (item, originals) ->
+ (typeof item is 'string') && (item of originals)
+
+descalarize = (item, originals) ->
+ if isScalarized(item, originals)
+ originals[item]
+ else
+ item
arrayDiff = (obj1, obj2, stats) ->
- opcodes = new SequenceMatcher(null, "a", "ab").getOpcodes()
- console.log opcodes
- return
+ originals1 = { __next: 1 }
+ seq1 = scalarize(obj1, originals1)
+ originals2 = { __next: originals1.__next }
+ seq2 = scalarize(obj2, originals2, originals1)
+
+ opcodes = new SequenceMatcher(null, seq1, seq2).getOpcodes()
+
+ # console.log "arrayDiff:\nobj1 = #{JSON.stringify(obj1, null, 2)}\nobj2 = #{JSON.stringify(obj2, null, 2)}\nseq1 = #{JSON.stringify(seq1, null, 2)}\nseq2 = #{JSON.stringify(seq2, null, 2)}\nopcodes = #{JSON.stringify(opcodes, null, 2)}"
+
+ result = []
+ score = 0
+
+ allEqual = yes
+ for [op, i1, i2, j1, j2] in opcodes
+ if op isnt 'equal'
+ allEqual = no
+
+ switch op
+ when 'equal'
+ for i in [i1 ... i2]
+ item = seq1[i]
+ if isScalarized(item, originals1)
+ unless isScalarized(item, originals2)
+ throw new AssertionError("internal bug: isScalarized(item, originals1) != isScalarized(item, originals2) for item #{JSON.stringify(item)}")
+ item1 = descalarize(item, originals1)
+ item2 = descalarize(item, originals2)
+ change = diff(item1, item2)
+ if change
+ result.push ['~', change]
+ allEqual = no
+ else
+ result.push ['=']
+ else
+ result.push ['=', item]
+ score += 10
+ when 'delete'
+ for i in [i1 ... i2]
+ result.push ['-', descalarize(seq1[i], originals1)]
+ score -= 5
+ when 'insert'
+ for j in [j1 ... j2]
+ result.push ['+', descalarize(seq2[j], originals2)]
+ score -= 5
+ when 'replace'
+ for i in [i1 ... i2]
+ result.push ['-', descalarize(seq1[i], originals1)]
+ score -= 5
+ for j in [j1 ... j2]
+ result.push ['+', descalarize(seq2[j], originals2)]
+ score -= 5
+
+ if allEqual or (opcodes.length is 0)
+ result = undefined
+ score = 100
+ else
+ score = Math.max(0, score)
+ return [score, result]
-diff = (obj1, obj2, stats) ->
+
+diffWithScore = (obj1, obj2) ->
type1 = extendedTypeOf obj1
type2 = extendedTypeOf obj2
if type1 == type2
switch type1
when 'object'
- return objectDiff(obj1, obj2, stats)
+ return objectDiff(obj1, obj2)
when 'array'
- return arrayDiff(obj1, obj2, stats)
+ return arrayDiff(obj1, obj2)
if obj1 != obj2
- { __old: obj1, __new: obj2 }
+ [0, { __old: obj1, __new: obj2 }]
else
- undefined
+ [100, undefined]
+
+diff = (obj1, obj2) ->
+ [score, change] = diffWithScore(obj1, obj2)
+ return change
+diffScore = (obj1, obj2) ->
+ [score, change] = diffWithScore(obj1, obj2)
+ return score
module.exports = { diff }
View
@@ -4,29 +4,61 @@ assert = require 'assert'
describe 'diff', ->
- it "should return undefined for two identical numbers", ->
- assert.deepEqual undefined, diff(42, 42)
+ describe 'with simple scalar values', ->
- it "should return undefined for two identical strings", ->
- assert.deepEqual undefined, diff("foo", "foo")
+ it "should return undefined for two identical numbers", ->
+ assert.deepEqual undefined, diff(42, 42)
- it "should return { __old: <old value>, __new: <new value> } object for two different numbers", ->
- assert.deepEqual { __old: 42, __new: 10 }, diff(42, 10)
+ it "should return undefined for two identical strings", ->
+ assert.deepEqual undefined, diff("foo", "foo")
- it "should return undefined for two objects with identical contents", ->
- assert.deepEqual undefined, diff({ foo: 42, bar: 10 }, { foo: 42, bar: 10 })
+ it "should return { __old: <old value>, __new: <new value> } object for two different numbers", ->
+ assert.deepEqual { __old: 42, __new: 10 }, diff(42, 10)
- it "should return undefined for two object hierarchies with identical contents", ->
- assert.deepEqual undefined, diff({ foo: 42, bar: { bbbar: 10, bbboz: 11 } }, { foo: 42, bar: { bbbar: 10, bbboz: 11 } })
+ describe 'with objects', ->
- it "should return { <key>__deleted: <old value> } when the second object is missing a key", ->
- assert.deepEqual { foo__deleted: 42 }, diff({ foo: 42, bar: 10 }, { bar: 10 })
+ it "should return undefined for two objects with identical contents", ->
+ assert.deepEqual undefined, diff({ foo: 42, bar: 10 }, { foo: 42, bar: 10 })
- it "should return { <key>__added: <new value> } when the first object is missing a key", ->
- assert.deepEqual { foo__added: 42 }, diff({ bar: 10 }, { foo: 42, bar: 10 })
+ it "should return undefined for two object hierarchies with identical contents", ->
+ assert.deepEqual undefined, diff({ foo: 42, bar: { bbbar: 10, bbboz: 11 } }, { foo: 42, bar: { bbbar: 10, bbboz: 11 } })
- it "should return { <key>: { __old: <old value>, __new: <new value> } } for two objects with diffent scalar values for a key", ->
- assert.deepEqual { foo: { __old: 42, __new: 10 } }, diff({ foo: 42 }, { foo: 10 })
+ it "should return { <key>__deleted: <old value> } when the second object is missing a key", ->
+ assert.deepEqual { foo__deleted: 42 }, diff({ foo: 42, bar: 10 }, { bar: 10 })
- it "should return { <key>: <diff> } with a recursive diff for two objects with diffent values for a key", ->
- assert.deepEqual { bar: { bbboz__deleted: 11, bbbar: { __old: 10, __new: 12 } } }, diff({ foo: 42, bar: { bbbar: 10, bbboz: 11 }}, { foo: 42, bar: { bbbar: 12 }})
+ it "should return { <key>__added: <new value> } when the first object is missing a key", ->
+ assert.deepEqual { foo__added: 42 }, diff({ bar: 10 }, { foo: 42, bar: 10 })
+
+ it "should return { <key>: { __old: <old value>, __new: <new value> } } for two objects with diffent scalar values for a key", ->
+ assert.deepEqual { foo: { __old: 42, __new: 10 } }, diff({ foo: 42 }, { foo: 10 })
+
+ it "should return { <key>: <diff> } with a recursive diff for two objects with diffent values for a key", ->
+ assert.deepEqual { bar: { bbboz__deleted: 11, bbbar: { __old: 10, __new: 12 } } }, diff({ foo: 42, bar: { bbbar: 10, bbboz: 11 }}, { foo: 42, bar: { bbbar: 12 }})
+
+ describe 'with arrays of scalars', ->
+
+ it "should return undefined for two arrays with identical contents", ->
+ assert.deepEqual undefined, diff([10, 20, 30], [10, 20, 30])
+
+ it "should return [..., ['-', <removed item>], ...] for two arrays when the second array is missing a value", ->
+ assert.deepEqual [['=', 10], ['-', 20], ['=', 30]], diff([10, 20, 30], [10, 30])
+
+ it "should return [..., ['+', <added item>], ...] for two arrays when the second one has an extra value", ->
+ assert.deepEqual [['=', 10], ['+', 20], ['=', 30]], diff([10, 30], [10, 20, 30])
+
+ it "should return [..., ['+', <added item>]] for two arrays when the second one has an extra value at the end (edge case test)", ->
+ assert.deepEqual [['=', 10], ['=', 20], ['+', 30]], diff([10, 20], [10, 20, 30])
+
+ describe 'with arrays of objects', ->
+
+ it "should return undefined for two arrays with identical contents", ->
+ assert.deepEqual undefined, diff([{ foo: 10 }, { foo: 20 }, { foo: 30 }], [{ foo: 10 }, { foo: 20 }, { foo: 30 }])
+
+ it "should return [..., ['-', <removed item>], ...] for two arrays when the second array is missing a value", ->
+ assert.deepEqual [['='], ['-', { foo: 20 }], ['=']], diff([{ foo: 10 }, { foo: 20 }, { foo: 30 }], [{ foo: 10 }, { foo: 30 }])
+
+ it "should return [..., ['+', <added item>], ...] for two arrays when the second array has an extra value", ->
+ assert.deepEqual [['='], ['+', { foo: 20 }], ['=']], diff([{ foo: 10 }, { foo: 30 }], [{ foo: 10 }, { foo: 20 }, { foo: 30 }])
+
+ it "should return [..., ['~', <diff>], ...] for two arrays when an item has been modified (note: involves a crazy heuristic)", ->
+ assert.deepEqual [['='], ['~', { foo: { __old: 20, __new: 21 } }], ['=']], diff([{ foo: 10, bar: { bbbar: 10, bbboz: 11 } }, { foo: 20, bar: { bbbar: 50, bbboz: 25 } }, { foo: 30, bar: { bbbar: 92, bbboz: 34 } }], [{ foo: 10, bar: { bbbar: 10, bbboz: 11 } }, { foo: 21, bar: { bbbar: 50, bbboz: 25 } }, { foo: 30, bar: { bbbar: 92, bbboz: 34 } }])

0 comments on commit 429a86c

Please sign in to comment.