Permalink
Browse files

Merge branch 'jk/cat-file-batch-optim'

If somebody wants to only know on-disk footprint of an object
without having to know its type or payload size, we can bypass a
lot of code to cheaply learn it.

* jk/cat-file-batch-optim:
  Fix some sparse warnings
  sha1_object_info_extended: pass object_info to helpers
  sha1_object_info_extended: make type calculation optional
  packed_object_info: make type lookup optional
  packed_object_info: hoist delta type resolution to helper
  sha1_loose_object_info: make type lookup optional
  sha1_object_info_extended: rename "status" to "type"
  cat-file: disable object/refname ambiguity check for batch mode
  • Loading branch information...
gitster committed Jul 25, 2013
2 parents 2bf3501 + d099b71 commit 356df9bd8df58eb759fedaee8a8d1a7dc0872f8f
Showing with 145 additions and 71 deletions.
  1. +13 −3 builtin/cat-file.c
  2. +2 −0 cache.h
  3. +1 −0 environment.c
  4. +119 −60 sha1_file.c
  5. +8 −6 sha1_name.c
  6. +2 −2 streaming.c
View
@@ -150,7 +150,9 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
if (!data->mark_query)
strbuf_addstr(sb, sha1_to_hex(data->sha1));
} else if (is_atom("objecttype", atom, len)) {
- if (!data->mark_query)
+ if (data->mark_query)
+ data->info.typep = &data->type;
+ else
strbuf_addstr(sb, typename(data->type));
} else if (is_atom("objectsize", atom, len)) {
if (data->mark_query)
@@ -229,8 +231,7 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
return 0;
}
- data->type = sha1_object_info_extended(data->sha1, &data->info);
- if (data->type <= 0) {
+ if (sha1_object_info_extended(data->sha1, &data->info) < 0) {
printf("%s missing\n", obj_name);
fflush(stdout);
return 0;
@@ -266,6 +267,15 @@ static int batch_objects(struct batch_options *opt)
strbuf_expand(&buf, opt->format, expand_format, &data);
data.mark_query = 0;
+ /*
+ * We are going to call get_sha1 on a potentially very large number of
+ * objects. In most large cases, these will be actual object sha1s. The
+ * cost to double-check that each one is not also a ref (just so we can
+ * warn) ends up dwarfing the actual cost of the object lookups
+ * themselves. We can work around it by just turning off the warning.
+ */
+ warn_on_object_refname_ambiguity = 0;
+
while (strbuf_getline(&buf, stdin, '\n') != EOF) {
char *p;
int error;
View
@@ -577,6 +577,7 @@ extern int assume_unchanged;
extern int prefer_symlink_refs;
extern int log_all_ref_updates;
extern int warn_ambiguous_refs;
+extern int warn_on_object_refname_ambiguity;
extern int shared_repository;
extern const char *apply_default_whitespace;
extern const char *apply_default_ignorewhitespace;
@@ -1131,6 +1132,7 @@ extern int unpack_object_header(struct packed_git *, struct pack_window **, off_
struct object_info {
/* Request */
+ enum object_type *typep;
unsigned long *sizep;
unsigned long *disk_sizep;
View
@@ -22,6 +22,7 @@ int prefer_symlink_refs;
int is_bare_repository_cfg = -1; /* unspecified */
int log_all_ref_updates = -1; /* unspecified */
int warn_ambiguous_refs = 1;
+int warn_on_object_refname_ambiguity = 1;
int repository_format_version;
const char *git_commit_encoding;
const char *git_log_output_encoding;
View
@@ -1306,6 +1306,26 @@ static int git_open_noatime(const char *name)
}
}
+static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
+{
+ char *name = sha1_file_name(sha1);
+ struct alternate_object_database *alt;
+
+ if (!lstat(name, st))
+ return 0;
+
+ prepare_alt_odb();
+ errno = ENOENT;
+ for (alt = alt_odb_list; alt; alt = alt->next) {
+ name = alt->name;
+ fill_sha1_path(name, sha1);
+ if (!lstat(alt->base, st))
+ return 0;
+ }
+
+ return -1;
+}
+
static int open_sha1_file(const unsigned char *sha1)
{
int fd;
@@ -1693,52 +1713,21 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
return type;
}
-
#define POI_STACK_PREALLOC 64
-static int packed_object_info(struct packed_git *p, off_t obj_offset,
- unsigned long *sizep, int *rtype,
- unsigned long *disk_sizep)
+static enum object_type packed_to_object_type(struct packed_git *p,
+ off_t obj_offset,
+ enum object_type type,
+ struct pack_window **w_curs,
+ off_t curpos)
{
- struct pack_window *w_curs = NULL;
- unsigned long size;
- off_t curpos = obj_offset;
- enum object_type type;
off_t small_poi_stack[POI_STACK_PREALLOC];
off_t *poi_stack = small_poi_stack;
int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
- type = unpack_object_header(p, &w_curs, &curpos, &size);
-
- if (rtype)
- *rtype = type; /* representation type */
-
- if (sizep) {
- if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
- off_t tmp_pos = curpos;
- off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
- type, obj_offset);
- if (!base_offset) {
- type = OBJ_BAD;
- goto out;
- }
- *sizep = get_size_from_delta(p, &w_curs, tmp_pos);
- if (*sizep == 0) {
- type = OBJ_BAD;
- goto out;
- }
- } else {
- *sizep = size;
- }
- }
-
- if (disk_sizep) {
- struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
- *disk_sizep = revidx[1].offset - obj_offset;
- }
-
while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
off_t base_offset;
+ unsigned long size;
/* Push the object we're going to leave behind */
if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
poi_stack_alloc = alloc_nr(poi_stack_nr);
@@ -1749,11 +1738,11 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
}
poi_stack[poi_stack_nr++] = obj_offset;
/* If parsing the base offset fails, just unwind */
- base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
+ base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
if (!base_offset)
goto unwind;
curpos = obj_offset = base_offset;
- type = unpack_object_header(p, &w_curs, &curpos, &size);
+ type = unpack_object_header(p, w_curs, &curpos, &size);
if (type <= OBJ_NONE) {
/* If getting the base itself fails, we first
* retry the base, otherwise unwind */
@@ -1780,7 +1769,6 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
out:
if (poi_stack != small_poi_stack)
free(poi_stack);
- unuse_pack(&w_curs);
return type;
unwind:
@@ -1794,6 +1782,57 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
goto out;
}
+static int packed_object_info(struct packed_git *p, off_t obj_offset,
+ struct object_info *oi)
+{
+ struct pack_window *w_curs = NULL;
+ unsigned long size;
+ off_t curpos = obj_offset;
+ enum object_type type;
+
+ /*
+ * We always get the representation type, but only convert it to
+ * a "real" type later if the caller is interested.
+ */
+ type = unpack_object_header(p, &w_curs, &curpos, &size);
+
+ if (oi->sizep) {
+ if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
+ off_t tmp_pos = curpos;
+ off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
+ type, obj_offset);
+ if (!base_offset) {
+ type = OBJ_BAD;
+ goto out;
+ }
+ *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
+ if (*oi->sizep == 0) {
+ type = OBJ_BAD;
+ goto out;
+ }
+ } else {
+ *oi->sizep = size;
+ }
+ }
+
+ if (oi->disk_sizep) {
+ struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
+ *oi->disk_sizep = revidx[1].offset - obj_offset;
+ }
+
+ if (oi->typep) {
+ *oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
+ if (*oi->typep < 0) {
+ type = OBJ_BAD;
+ goto out;
+ }
+ }
+
+out:
+ unuse_pack(&w_curs);
+ return type;
+}
+
static void *unpack_compressed_entry(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
@@ -2363,68 +2402,84 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
}
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep,
- unsigned long *disk_sizep)
+static int sha1_loose_object_info(const unsigned char *sha1,
+ struct object_info *oi)
{
int status;
unsigned long mapsize, size;
void *map;
git_zstream stream;
char hdr[32];
+ /*
+ * If we don't care about type or size, then we don't
+ * need to look inside the object at all.
+ */
+ if (!oi->typep && !oi->sizep) {
+ if (oi->disk_sizep) {
+ struct stat st;
+ if (stat_sha1_file(sha1, &st) < 0)
+ return -1;
+ *oi->disk_sizep = st.st_size;
+ }
+ return 0;
+ }
+
map = map_sha1_file(sha1, &mapsize);
if (!map)
return -1;
- if (disk_sizep)
- *disk_sizep = mapsize;
+ if (oi->disk_sizep)
+ *oi->disk_sizep = mapsize;
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
status = error("unable to unpack %s header",
sha1_to_hex(sha1));
else if ((status = parse_sha1_header(hdr, &size)) < 0)
status = error("unable to parse %s header", sha1_to_hex(sha1));
- else if (sizep)
- *sizep = size;
+ else if (oi->sizep)
+ *oi->sizep = size;
git_inflate_end(&stream);
munmap(map, mapsize);
- return status;
+ if (oi->typep)
+ *oi->typep = status;
+ return 0;
}
/* returns enum object_type or negative */
int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
{
struct cached_object *co;
struct pack_entry e;
- int status, rtype;
+ int rtype;
co = find_cached_object(sha1);
if (co) {
+ if (oi->typep)
+ *(oi->typep) = co->type;
if (oi->sizep)
*(oi->sizep) = co->size;
if (oi->disk_sizep)
*(oi->disk_sizep) = 0;
oi->whence = OI_CACHED;
- return co->type;
+ return 0;
}
if (!find_pack_entry(sha1, &e)) {
/* Most likely it's a loose object. */
- status = sha1_loose_object_info(sha1, oi->sizep, oi->disk_sizep);
- if (status >= 0) {
+ if (!sha1_loose_object_info(sha1, oi)) {
oi->whence = OI_LOOSE;
- return status;
+ return 0;
}
/* Not a loose object; someone else may have just packed it. */
reprepare_packed_git();
if (!find_pack_entry(sha1, &e))
- return status;
+ return -1;
}
- status = packed_object_info(e.p, e.offset, oi->sizep, &rtype,
- oi->disk_sizep);
- if (status < 0) {
+ rtype = packed_object_info(e.p, e.offset, oi);
+ if (rtype < 0) {
mark_bad_packed_object(e.p, sha1);
- status = sha1_object_info_extended(sha1, oi);
+ return sha1_object_info_extended(sha1, oi);
} else if (in_delta_base_cache(e.p, e.offset)) {
oi->whence = OI_DBCACHED;
} else {
@@ -2435,15 +2490,19 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
rtype == OBJ_OFS_DELTA);
}
- return status;
+ return 0;
}
int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
{
- struct object_info oi = {0};
+ enum object_type type;
+ struct object_info oi = {NULL};
+ oi.typep = &type;
oi.sizep = sizep;
- return sha1_object_info_extended(sha1, &oi);
+ if (sha1_object_info_extended(sha1, &oi) < 0)
+ return -1;
+ return type;
}
static void *read_packed_sha1(const unsigned char *sha1,
View
@@ -452,13 +452,15 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1)
int at, reflog_len, nth_prior = 0;
if (len == 40 && !get_sha1_hex(str, sha1)) {
- refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
- if (refs_found > 0 && warn_ambiguous_refs) {
- warning(warn_msg, len, str);
- if (advice_object_name_warning)
- fprintf(stderr, "%s\n", _(object_name_msg));
+ if (warn_on_object_refname_ambiguity) {
+ refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
+ if (refs_found > 0 && warn_ambiguous_refs) {
+ warning(warn_msg, len, str);
+ if (advice_object_name_warning)
+ fprintf(stderr, "%s\n", _(object_name_msg));
+ }
+ free(real_ref);
}
- free(real_ref);
return 0;
}
View
@@ -111,11 +111,11 @@ static enum input_source istream_source(const unsigned char *sha1,
unsigned long size;
int status;
+ oi->typep = type;
oi->sizep = &size;
status = sha1_object_info_extended(sha1, oi);
if (status < 0)
return stream_error;
- *type = status;
switch (oi->whence) {
case OI_LOOSE:
@@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char *sha1,
struct stream_filter *filter)
{
struct git_istream *st;
- struct object_info oi = {0};
+ struct object_info oi = {NULL};
const unsigned char *real = lookup_replace_object(sha1);
enum input_source src = istream_source(real, type, &oi);

0 comments on commit 356df9b

Please sign in to comment.