Skip to content

Commit e0b86cc

Browse files
author
epriestley
committed
Add a Mercurial commit discovery daemon
Summary: Repository import has three major steps: - Commit discovery (serial) - Message parsing (parallel, mostly VCS independent) - Change parsing (parallel, highly VCS dependent) This implements commit discovery for Mercurial, similar to git's parsing: - List the heads of all the branches. - If we haven't already discovered them, follow them back to their roots (or the first commit we have discovered). - Import all the newly discovered commits, oldest first. This is a little complicated but it ensures we discover commits in depth order, so the discovery process is robust against interruption/failure. If we just inserted commits as we went, we might read the tip, insert it, and then crash. When we ran again, we'd think we had already discovered commits older than HEAD. This also allows later stages to rely on being able to find Phabricator commit IDs which correspond to parent commits. NOTE: This importer is fairly slow because "hg" has a large startup time (compare "hg --version" to "git --version" and "svn --version"; on my machine, hg has 60ms of overhead for any command) and we need to run many commands (see the whole "hg id" mess). You can expect something like 10,000 per hour, which means you may need to run overnight to discover a large repository (IIRC, the svn/git discovery processes are both about an order of magnitude faster). We could improve this with batching, but I want to keep it as simple as possible for now. Test Plan: Discovered all the commits in the main Mercurial repository, http://selenic.com/repo/hg. Reviewers: Makinde, jungejason, nh, tuomaspelkonen, aran Reviewed By: Makinde CC: aran, Makinde Differential Revision: 943
1 parent 209179a commit e0b86cc

File tree

4 files changed

+172
-2
lines changed

4 files changed

+172
-2
lines changed

scripts/daemon/phabricator_daemon_launcher.php

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ function must_have_extension($ext) {
8585
$phid = $repository->getPHID();
8686

8787
switch ($repository->getVersionControlSystem()) {
88-
case 'git':
88+
case PhabricatorRepositoryType::REPOSITORY_TYPE_GIT:
8989
echo "Launching 'git fetch' daemon on the {$desc} repository...\n";
9090
$control->launchDaemon(
9191
'PhabricatorRepositoryGitFetchDaemon',
@@ -99,14 +99,29 @@ function must_have_extension($ext) {
9999
$phid,
100100
));
101101
break;
102-
case 'svn':
102+
case PhabricatorRepositoryType::REPOSITORY_TYPE_SVN:
103103
echo "Launching discovery daemon on the {$desc} repository...\n";
104104
$control->launchDaemon(
105105
'PhabricatorRepositorySvnCommitDiscoveryDaemon',
106106
array(
107107
$phid,
108108
));
109109
break;
110+
case PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL:
111+
echo "Launching 'hg pull' daemon on the {$desc} repository...\n";
112+
$control->launchDaemon(
113+
'PhabricatorRepositoryMercurialPullDaemon',
114+
array(
115+
$phid,
116+
));
117+
echo "Launching discovery daemon on the {$desc} repository...\n";
118+
$control->launchDaemon(
119+
'PhabricatorRepositoryMercurialCommitDiscoveryDaemon',
120+
array(
121+
$phid,
122+
));
123+
break;
124+
110125
}
111126
}
112127

src/__phutil_library_map__.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@
578578
'PhabricatorRepositoryGitHubNotification' => 'applications/repository/storage/githubnotification',
579579
'PhabricatorRepositoryGitHubPostReceiveController' => 'applications/repository/controller/github-post-receive',
580580
'PhabricatorRepositoryListController' => 'applications/repository/controller/list',
581+
'PhabricatorRepositoryMercurialCommitDiscoveryDaemon' => 'applications/repository/daemon/commitdiscovery/mercurial',
581582
'PhabricatorRepositoryMercurialPullDaemon' => 'applications/repository/daemon/mercurialpull',
582583
'PhabricatorRepositoryPullLocalDaemon' => 'applications/repository/daemon/pulllocal',
583584
'PhabricatorRepositoryShortcut' => 'applications/repository/storage/shortcut',
@@ -1178,6 +1179,7 @@
11781179
'PhabricatorRepositoryGitHubNotification' => 'PhabricatorRepositoryDAO',
11791180
'PhabricatorRepositoryGitHubPostReceiveController' => 'PhabricatorRepositoryController',
11801181
'PhabricatorRepositoryListController' => 'PhabricatorRepositoryController',
1182+
'PhabricatorRepositoryMercurialCommitDiscoveryDaemon' => 'PhabricatorRepositoryCommitDiscoveryDaemon',
11811183
'PhabricatorRepositoryMercurialPullDaemon' => 'PhabricatorRepositoryPullLocalDaemon',
11821184
'PhabricatorRepositoryPullLocalDaemon' => 'PhabricatorRepositoryDaemon',
11831185
'PhabricatorRepositoryShortcut' => 'PhabricatorRepositoryDAO',
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
<?php
2+
3+
/*
4+
* Copyright 2011 Facebook, Inc.
5+
*
6+
* Licensed under the Apache License, Version 2.0 (the "License");
7+
* you may not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
class PhabricatorRepositoryMercurialCommitDiscoveryDaemon
20+
extends PhabricatorRepositoryCommitDiscoveryDaemon {
21+
22+
protected function discoverCommits() {
23+
$repository = $this->getRepository();
24+
25+
$vcs = $repository->getVersionControlSystem();
26+
if ($vcs != PhabricatorRepositoryType::REPOSITORY_TYPE_MERCURIAL) {
27+
throw new Exception("Repository is not a Mercurial repository.");
28+
}
29+
30+
$repository_phid = $repository->getPHID();
31+
32+
$repo_base = $repository->getDetail('local-path');
33+
list($stdout) = $repository->execxLocalCommand('branches');
34+
35+
$branches = ArcanistMercurialParser::parseMercurialBranches($stdout);
36+
$got_something = false;
37+
foreach ($branches as $name => $branch) {
38+
$commit = $branch['rev'];
39+
$commit = $this->getFullHash($commit);
40+
if ($this->isKnownCommit($commit)) {
41+
continue;
42+
} else {
43+
$this->discoverCommit($commit);
44+
$got_something = true;
45+
}
46+
}
47+
48+
return $got_something;
49+
}
50+
51+
private function getFullHash($commit) {
52+
53+
// NOTE: Mercurial shortens hashes to 12 characters by default. This
54+
// implies collisions with as few as a few million commits. The
55+
// documentation sensibly advises "Do not use short-form IDs for
56+
// long-lived representations". It then continues "You can use the
57+
// --debug option to display the full changeset ID". What?! Yes, this
58+
// is in fact the only way to turn on full hashes, and the hg source
59+
// code is littered with "hexfn = ui.debugflag and hex or short" and
60+
// similar. There is no more-selective flag or config option.
61+
//
62+
// Unfortunately, "hg --debug" turns on tons of other extra output,
63+
// including full commit messages in "hg log" and "hg parents" (which
64+
// ignore --style); this renders them unparseable. So we have to use
65+
// "hg id" to convert short hashes into full hashes. See:
66+
//
67+
// <http://mercurial.selenic.com/wiki/ChangeSetID>
68+
//
69+
// Of course, this means that if there are collisions we will break here
70+
// (the short commit identifier won't be unambiguous) but maybe Mercurial
71+
// will have a --full-hashes flag or something by then and we can fix it
72+
// properly. Until we run into that, this allows us to store data in the
73+
// right format so when we eventually encounter this we won't have to
74+
// reparse every Mercurial repository.
75+
76+
$repository = $this->getRepository();
77+
list($stdout) = $repository->execxLocalCommand(
78+
'id --debug -i --rev %s',
79+
$commit);
80+
return trim($stdout);
81+
}
82+
83+
private function discoverCommit($commit) {
84+
$discover = array();
85+
$insert = array();
86+
87+
$repository = $this->getRepository();
88+
89+
$discover[] = $commit;
90+
$insert[] = $commit;
91+
92+
$seen_parent = array();
93+
94+
// For all the new commits at the branch heads, walk backward until we find
95+
// only commits we've aleady seen.
96+
while (true) {
97+
$target = array_pop($discover);
98+
list($stdout) = $repository->execxLocalCommand(
99+
'parents --style default --rev %s',
100+
$target);
101+
$parents = ArcanistMercurialParser::parseMercurialLog($stdout);
102+
if ($parents) {
103+
foreach ($parents as $parent) {
104+
$parent_commit = $parent['rev'];
105+
$parent_commit = $this->getFullHash($parent_commit);
106+
if (isset($seen_parent[$parent_commit])) {
107+
continue;
108+
}
109+
$seen_parent[$parent_commit] = true;
110+
if (!$this->isKnownCommit($parent_commit)) {
111+
$discover[] = $parent_commit;
112+
$insert[] = $parent_commit;
113+
}
114+
}
115+
}
116+
if (empty($discover)) {
117+
break;
118+
}
119+
$this->stillWorking();
120+
}
121+
122+
while (true) {
123+
$target = array_pop($insert);
124+
list($stdout) = $repository->execxLocalCommand(
125+
'log --rev %s --template %s',
126+
$target,
127+
'{date|rfc822date}');
128+
$epoch = strtotime($stdout);
129+
130+
$this->recordCommit($target, $epoch);
131+
132+
if (empty($insert)) {
133+
break;
134+
}
135+
}
136+
}
137+
138+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
<?php
2+
/**
3+
* This file is automatically generated. Lint this module to rebuild it.
4+
* @generated
5+
*/
6+
7+
8+
9+
phutil_require_module('arcanist', 'repository/parser/mercurial');
10+
11+
phutil_require_module('phabricator', 'applications/repository/constants/repositorytype');
12+
phutil_require_module('phabricator', 'applications/repository/daemon/commitdiscovery/base');
13+
14+
15+
phutil_require_source('PhabricatorRepositoryMercurialCommitDiscoveryDaemon.php');

0 commit comments

Comments
 (0)