Skip to content

Commit

Permalink
fix issues with encoding caused by using literals
Browse files Browse the repository at this point in the history
  • Loading branch information
JonRowe committed Mar 14, 2013
1 parent ef3e164 commit 511c37f
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 20 deletions.
59 changes: 39 additions & 20 deletions lib/diff/lcs/hunk.rb
Expand Up @@ -120,19 +120,19 @@ def old_diff
# Calculate item number range. Old diff range is just like a context
# diff range, except the ranges are on one line with the action between
# them.
s = "#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n"
s = encode("#{context_range(:old)}#{op_act[block.op]}#{context_range(:new)}\n")
# If removing anything, just print out all the remove lines in the hunk
# which is just all the remove lines in the block.
@data_old[@start_old .. @end_old].each { |e| s << "< #{e}\n" } unless block.remove.empty?
s << "---\n" if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << "> #{e}\n" } unless block.insert.empty?
@data_old[@start_old .. @end_old].each { |e| s << encode("< ")+e+encode("\n") } unless block.remove.empty?
s << encode("---\n") if block.op == "!"
@data_new[@start_new .. @end_new].each { |e| s << encode("> ")+e+encode("\n") } unless block.insert.empty?
s
end
private :old_diff

def unified_diff
# Calculate item number range.
s = "@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n"
s = encode("@@ -#{unified_range(:old)} +#{unified_range(:new)} @@\n")

# Outlist starts containing the hunk of the old file. Removing an item
# just means putting a '-' in front of it. Inserting an item requires
Expand All @@ -145,54 +145,54 @@ def unified_diff
# file -- don't take removed items into account.
lo, hi, num_added, num_removed = @start_old, @end_old, 0, 0

outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }

@blocks.each do |block|
block.remove.each do |item|
op = item.action.to_s # -
offset = item.position - lo + num_added
outlist[offset].gsub!(/^ /, op.to_s)
match_encoding_gsub!(outlist[offset],'^ ', op.to_s)
num_removed += 1
end
block.insert.each do |item|
op = item.action.to_s # +
offset = item.position - @start_new + num_removed
outlist[offset, 0] = "#{op}#{@data_new[item.position]}"
outlist[offset, 0] = encode(op)+@data_new[item.position]
num_added += 1
end
end

s << outlist.join("\n")
s << outlist.join(encode("\n"))
end
private :unified_diff

def context_diff
s = "***************\n"
s << "*** #{context_range(:old)} ****\n"
s = encode("***************\n")
s << encode("*** #{context_range(:old)} ****\n")
r = context_range(:new)

# Print out file 1 part for each block in context diff format if there
# are any blocks that remove items
lo, hi = @start_old, @end_old
removes = @blocks.select { |e| not e.remove.empty? }
if removes
outlist = @data_old[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_old[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
removes.each do |block|
block.remove.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # - or !
match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # - or !
end
end
s << outlist.join("\n")
end

s << "\n--- #{r} ----\n"
s << encode("\n--- #{r} ----\n")
lo, hi = @start_new, @end_new
inserts = @blocks.select { |e| not e.insert.empty? }
if inserts
outlist = @data_new[lo .. hi].collect { |e| e.gsub(/^/, ' ') }
outlist = @data_new[lo .. hi].collect { |e| match_encoding_gsub(e,'^', ' ') }
inserts.each do |block|
block.insert.each do |item|
outlist[item.position - lo].gsub!(/^ /) { block.op } # + or !
match_encoding_gsub!( outlist[item.position - lo], '^ ') { block.op } # + or !
end
end
s << outlist.join("\n")
Expand All @@ -206,14 +206,14 @@ def ed_diff(format)
warn "Expecting only one block in an old diff hunk!" if @blocks.size > 1

if format == :reverse_ed
s = "#{op_act[@blocks[0].op]}#{context_range(:old)}\n"
s = encode("#{op_act[@blocks[0].op]}#{context_range(:old)}\n")
else
s = "#{context_range(:old).gsub(/,/, ' ')}#{op_act[@blocks[0].op]}\n"
s = encode("#{match_encoding_gsub(context_range(:old), ',', ' ')}#{op_act[@blocks[0].op]}\n")
end

unless @blocks[0].insert.empty?
@data_new[@start_new .. @end_new].each { |e| s << "#{e}\n" }
s << ".\n"
@data_new[@start_new .. @end_new].each { |e| s << e+encode("\n") }
s << encode(".\n")
end
s
end
Expand Down Expand Up @@ -249,4 +249,23 @@ def unified_range(mode)
(length == 1) ? "#{first}" : "#{first},#{length}"
end
private :unified_range

def encode(literal)
literal.encode @data_old[0].encoding
end
def encode_to(string, args)
args.map { |arg| arg.encode(string.encoding) }
end
private :encode_to

def match_encoding_gsub(string, *args, &block)
string.gsub( *encode_to(string,args), &block )
end
private :match_encoding_gsub

def match_encoding_gsub!(string, *args, &block)
string.gsub!( *encode_to(string,args), &block )
end
private :match_encoding_gsub!

end
57 changes: 57 additions & 0 deletions spec/hunk_spec.rb
@@ -0,0 +1,57 @@
# -*- ruby encoding: utf-8 -*-

require 'spec_helper'

describe "Diff::LCS::Hunk" do

let(:old_data) { ["Tu avec carté {count} itém has".encode('UTF-16LE')] }
let(:new_data) { ["Tu avec carte {count} item has".encode('UTF-16LE')] }
let(:peices) { Diff::LCS.diff old_data, new_data }
let(:hunk) { Diff::LCS::Hunk.new(old_data, new_data, peices[0], 3, 0) }

it 'should be able to produce a unified diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
@@ -1,2 +1,2 @@
Tu avec carté {count} itém has
+Tu avec carte {count} item has
EOD
expect(hunk.diff(:unified).to_s == expected).to eql true
end

it 'should be able to produce a context diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
***************
*** 1,2 ****
Tu avec carté {count} itém has
--- 1,2 ----
Tu avec carte {count} item has
EOD
expect(hunk.diff(:context).to_s == expected).to eql true
end

it 'should be able to produce an old diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
1,2c1,2
< Tu avec carté {count} itém has
---
> Tu avec carte {count} item has
EOD
expect(hunk.diff(:old).to_s == expected).to eql true
end

it 'should be able to produce a reverse ed diff from the two peices' do
expected =
(<<-EOD.encode('UTF-16LE').chomp)
c1,2
Tu avec carte {count} item has
.
EOD
expect(hunk.diff(:reverse_ed).to_s == expected).to eql true
end

end

0 comments on commit 511c37f

Please sign in to comment.