Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

executable file 359 lines (288 sloc) 9.104 kB
#!/usr/bin/env ruby
require 'fileutils'
require 'date'
require './util'
$commit_authors = {}
$commit_svn_authors = {}
$commit_committers = {}
$commit_dates = {}
$commit_messages = {}
$commit_map_git = {}
$commit_map_mtn = {}
$author_map = {}
$branches_map = {}
@svn_map = {}
@map = {}
@marks = {}
@mtn_map = {}
File.open('authors-map').each do |l|
id, author = l.chomp.split(' = ')
$author_map[id] = author
end
File.open('marks-mtn').each do |l|
mark, id = l.split
@marks[mark] = id
end
File.open('marks-git').each do |l|
mark, git_id = l.split
mtn_id = @marks[mark]
next if not mtn_id
@mtn_map[mtn_id] = git_id
end
File.open('svn-authors-map').each do |l|
l.scan(/(.+) = (.+) <(.+)>/) do |id,name,email|
@map[id] = [name, email]
end
end
# get svn-rev -> git-sha1 mapping
File.open('svn-revisions').each do |l|
svn_id, mtn_id = l.split
git_id = @mtn_map[mtn_id]
@svn_map[svn_id] ||= []
@svn_map[svn_id] << git_id
end
# convert svn path authors to something useful
File.open('svn-authors').each do |l|
rev, author = l.chomp.split(' ', 2)
mark = author[0] == '#'[0]
if not mark
author.gsub!(/ ?\(.+/,'')
else
author = author[2..-1]
end
if author =~ /^(.+) <(.+)>/
name, email = $1, $2
elsif mark and @map[author]
name, email = @map[author]
else
name, email = author, 'unknown'
end
@svn_map[rev].each do |id|
$commit_svn_authors[id] = [name, email]
end
end
File.open('branches-map').each do |l|
orig, map = l.chomp.split(" = ")
$branches_map[orig] = map
end
def fix_commit(rev, author = '', committer = '')
id = @mtn_map[rev]
author.scan(/(.+) <(.+)>/) do |name, email|
$commit_authors[id] = [name, email]
end
committer.scan(/(.+) <(.+)>/) do |name, email|
$commit_committers[id] = [name, email]
end
return if not block_given?
$commit_messages[id] = yield
end
def extract_text(value)
return [value].pack("H*").encode('utf-8', 'iso-8859-1')
end
def fix_certs()
#
# mercurial conversion picked the last cert when there was more than one.
# Lets manually look for these repeated certs and pick the last one.
#
# To do this efficiently we use a trick of sqlite; using 'group by' returns
# the value of the last row.
#
run_query %{select lower(hex(revision_id)),value from revision_certs where name="author" group by revision_id having count(*) > 1} do |rev, value|
id = @mtn_map[rev]
author = $author_map[value]
author.scan(/(.+) <(.+)>/) do |name, email|
$commit_authors[id] = [name, email]
end
end
run_query %{select lower(hex(revision_id)),value from revision_certs where name="date" group by revision_id having count(*) > 1} do |rev, value|
id = @mtn_map[rev]
$commit_dates[id] = DateTime.parse(value)
end
run_query %{select lower(hex(cc.revision_id)),hex(cc.value),k.name from revision_certs cc join public_keys k on k.id = cc.keypair_id where cc.name='changelog' group by cc.revision_id having count(*) > 1} do |rev, value, key|
id = @mtn_map[rev]
author = $author_map[key]
author.scan(/(.+) <(.+)>/) do |name, email|
$commit_committers[id] = [name, email]
end
$commit_messages[id] = extract_text(value)
end
#
# mercurial conversion added comments in a different format, plus it only used
# the last one. Lets manually fix all the commits with comments.
#
run_query %{select lower(hex(revision_id)),hex(value),count(*) as c from revision_certs where name="comment" group by revision_id} do |rev, value, count|
id = @mtn_map[rev]
# get the last changelog of this revision
changelog = ''
run_query %{select hex(value) from revision_certs where name="changelog" and revision_id=X'#{rev}' group by revision_id} do |value, _|
changelog = extract_text(value)
end
$commit_messages[id] = changelog + "\n\n" + extract_text(value)
end
end
fix_certs()
require './fix-commits'
FileUtils.rm_rf('pidgin-fix.git')
system 'git clone --bare pidgin.git pidgin-fix.git'
ENV['GIT_DIR'] = 'pidgin-fix.git'
# check for lingering heads
@branches = {}
run_query %{select b.branch,lower(hex(b.revision_id)) from branch_leaves b} do |branch, id|
@branches[branch] ||= []
@branches[branch] << id
end
@branches.select { |branch, ids| ids.count > 1 }.each do |branch,ids|
count = 0
ids.each do |id|
git_id = @mtn_map[id]
bad = `git branch --contains #{git_id} | wc -l`.chomp.to_i == 0
system "git branch tmp-#{branch}-#{count += 1} #{git_id}" if bad
end
end
class Commit
attr_reader :id, :parents, :msg
def initialize(data)
@parents = []
msg = nil
data.each_line do |l|
if not msg
case l
when /^commit (.+)$/
@id = $1
when /^tree (.+)$/
@tree = $1
when /^author (.+) <(.+)> (.+)$/
@author = [$1, $2]
@author_date = DateTime.strptime($3, '%s %z')
when /^committer (.+) <(.+)> (.+)$/
@committer = [$1, $2]
@committer_date = DateTime.strptime($3, '%s %z')
when /^parents (.+)$/
@parents = $1.split(" ")
when /^$/
msg = ""
end
else
msg << l
end
end
@author = $commit_authors[@id] if $commit_authors[@id]
@committer = $commit_committers[@id] if $commit_committers[@id]
@author_date = $commit_dates[@id] if $commit_dates[@id]
if $commit_svn_authors[@id]
@committer = @author # mercurial conv did this
@author = $commit_svn_authors[@id]
end
msg = msg.force_encoding('ascii-8bit')
if msg =~ /\A(.*?)\n(Monotone-\S+: .+\n*)+\Z/m
msg, mtn_msg = $1, $2
@mtn_id = mtn_msg.match(/^Monotone-Revision: (.+)$/)[1]
branches = []
mtn_msg.scan(/^Monotone-Branch: (.+)$/) do |b, _|
branches << b
end
branch = branches.member?('im.pidgin.pidgin') ? 'im.pidgin.pidgin' : branches.last
if branch
branch = $branches_map[branch]
branch = 'default' if branch == 'master'
else
branch = 'default'
end
hg_msg = "\n--HG--\n"
hg_msg += "branch : #{branch}\n"
if @author != @committer and @committer != ['Tailor Conversion Tool', 'tailor@pidgin.im']
hg_msg += "committer : %s <%s>\n" % @committer
end
hg_msg += "extra : monotone-revision : #{@mtn_id}\n"
hg_msg += "extra : monotone-branches : #{branches.join(' ')}\n"
@hg_msg = hg_msg
@mtn_msg = mtn_msg.gsub!(/\n(Monotone-\S+: .+\n*)+\Z/, '')
end
msg = $commit_messages[@id] if $commit_messages[@id]
@msg = msg
end
def export()
ENV['GIT_AUTHOR_NAME'] = @author[0]
ENV['GIT_AUTHOR_EMAIL'] = @author[1]
ENV['GIT_AUTHOR_DATE'] = @author_date.strftime('%s %z')
ENV['GIT_COMMITTER_NAME'] = @committer[0]
ENV['GIT_COMMITTER_EMAIL'] = @committer[1]
ENV['GIT_COMMITTER_DATE'] = @committer_date.strftime('%s %z')
msg = @msg
# convert mtn ids
ids = {}
msg.scan(/\h{40}/).each do |mtn_id|
ids[mtn_id] = $commit_map_mtn[mtn_id] if $commit_map_mtn[mtn_id]
end
ids.each do |mtn_id, new_id|
msg.gsub!(/#{mtn_id}/, new_id)
end
if @author != @committer and @committer != ['Tailor Conversion Tool', 'tailor@pidgin.im']
msg += "\n\ncommitter: %s <%s>\n" % @committer
end
msg += "\n" + @hg_msg + "\n" + @mtn_msg
new_id = nil
parents = @parents.map { |e| ['-p', $commit_map_git[e]] }.flatten
IO.popen(['git', 'commit-tree', @tree] + parents, "w+") do |pipe|
pipe.write(msg)
pipe.close_write
new_id = pipe.read().chomp()
end
$commit_map_mtn[@mtn_id] = new_id
$commit_map_git[@id] = new_id
end
end
stack = []
heads = `git rev-parse --no-flags --revs-only --symbolic-full-name --all`.split
format = [
'commit %H',
'tree %T',
'author %an <%ae> %ad',
'committer %cn <%ce> %cd',
'parents %P',
'', '%B' ].join('%n')
args = %w[--reverse --topo-order --parents --simplify-merges --all]
command = %W[git log -z -s --date=raw --format=format:#{format}] + args
IO.popen(command).each("\0") do |data|
c = Commit.new(data.chomp("\0"))
stack << c
end
count = 0
total = stack.size
pending = {}
def show(str)
$stdout.print str if $stdout.isatty
end
def is_ready(c)
c.parents.each do |e|
return false if not $commit_map_git[e]
end
c.msg.scan(/\h{40}/).each do |mtn_id|
git_id = @mtn_map[mtn_id]
next if not git_id or git_id == c.id
return false if not $commit_map_git[git_id]
end
return true
end
until stack.empty? do
c = stack.shift
next if $commit_map_git[c.id]
if not is_ready(c)
stack.unshift(c)
next_index = stack.index { |e| is_ready(e) }
stack.rotate!(next_index)
next
end
c.export
show "\rRewrite #{c.id} (#{count += 1}/#{total})"
end
heads.each do |e|
id = `git rev-parse #{e}^0`.chomp
new_id = $commit_map_git[id]
system('git', 'update-ref', e, new_id, id)
puts "%s %s -> %s" % [e, id, new_id]
end
# cleanup old stuff
system 'git remote rm origin'
system 'git reflog expire --expire=now --all'
system 'git prune --expire=now'
Jump to Line
Something went wrong with that request. Please try again.