Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Merge branch 'jk/cat-file-batch-optim'

If somebody wants to only know on-disk footprint of an object
without having to know its type or payload size, we can bypass a
lot of code to cheaply learn it.

* jk/cat-file-batch-optim:
  Fix some sparse warnings
  sha1_object_info_extended: pass object_info to helpers
  sha1_object_info_extended: make type calculation optional
  packed_object_info: make type lookup optional
  packed_object_info: hoist delta type resolution to helper
  sha1_loose_object_info: make type lookup optional
  sha1_object_info_extended: rename "status" to "type"
  cat-file: disable object/refname ambiguity check for batch mode
  • Loading branch information...
commit 356df9bd8df58eb759fedaee8a8d1a7dc0872f8f 2 parents 2bf3501 + d099b71
Junio C Hamano authored July 24, 2013
16  builtin/cat-file.c
@@ -150,7 +150,9 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
150 150
 		if (!data->mark_query)
151 151
 			strbuf_addstr(sb, sha1_to_hex(data->sha1));
152 152
 	} else if (is_atom("objecttype", atom, len)) {
153  
-		if (!data->mark_query)
  153
+		if (data->mark_query)
  154
+			data->info.typep = &data->type;
  155
+		else
154 156
 			strbuf_addstr(sb, typename(data->type));
155 157
 	} else if (is_atom("objectsize", atom, len)) {
156 158
 		if (data->mark_query)
@@ -229,8 +231,7 @@ static int batch_one_object(const char *obj_name, struct batch_options *opt,
229 231
 		return 0;
230 232
 	}
231 233
 
232  
-	data->type = sha1_object_info_extended(data->sha1, &data->info);
233  
-	if (data->type <= 0) {
  234
+	if (sha1_object_info_extended(data->sha1, &data->info) < 0) {
234 235
 		printf("%s missing\n", obj_name);
235 236
 		fflush(stdout);
236 237
 		return 0;
@@ -266,6 +267,15 @@ static int batch_objects(struct batch_options *opt)
266 267
 	strbuf_expand(&buf, opt->format, expand_format, &data);
267 268
 	data.mark_query = 0;
268 269
 
  270
+	/*
  271
+	 * We are going to call get_sha1 on a potentially very large number of
  272
+	 * objects. In most large cases, these will be actual object sha1s. The
  273
+	 * cost to double-check that each one is not also a ref (just so we can
  274
+	 * warn) ends up dwarfing the actual cost of the object lookups
  275
+	 * themselves. We can work around it by just turning off the warning.
  276
+	 */
  277
+	warn_on_object_refname_ambiguity = 0;
  278
+
269 279
 	while (strbuf_getline(&buf, stdin, '\n') != EOF) {
270 280
 		char *p;
271 281
 		int error;
2  cache.h
@@ -577,6 +577,7 @@ extern int assume_unchanged;
577 577
 extern int prefer_symlink_refs;
578 578
 extern int log_all_ref_updates;
579 579
 extern int warn_ambiguous_refs;
  580
+extern int warn_on_object_refname_ambiguity;
580 581
 extern int shared_repository;
581 582
 extern const char *apply_default_whitespace;
582 583
 extern const char *apply_default_ignorewhitespace;
@@ -1131,6 +1132,7 @@ extern int unpack_object_header(struct packed_git *, struct pack_window **, off_
1131 1132
 
1132 1133
 struct object_info {
1133 1134
 	/* Request */
  1135
+	enum object_type *typep;
1134 1136
 	unsigned long *sizep;
1135 1137
 	unsigned long *disk_sizep;
1136 1138
 
1  environment.c
@@ -22,6 +22,7 @@ int prefer_symlink_refs;
22 22
 int is_bare_repository_cfg = -1; /* unspecified */
23 23
 int log_all_ref_updates = -1; /* unspecified */
24 24
 int warn_ambiguous_refs = 1;
  25
+int warn_on_object_refname_ambiguity = 1;
25 26
 int repository_format_version;
26 27
 const char *git_commit_encoding;
27 28
 const char *git_log_output_encoding;
179  sha1_file.c
@@ -1306,6 +1306,26 @@ static int git_open_noatime(const char *name)
1306 1306
 	}
1307 1307
 }
1308 1308
 
  1309
+static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
  1310
+{
  1311
+	char *name = sha1_file_name(sha1);
  1312
+	struct alternate_object_database *alt;
  1313
+
  1314
+	if (!lstat(name, st))
  1315
+		return 0;
  1316
+
  1317
+	prepare_alt_odb();
  1318
+	errno = ENOENT;
  1319
+	for (alt = alt_odb_list; alt; alt = alt->next) {
  1320
+		name = alt->name;
  1321
+		fill_sha1_path(name, sha1);
  1322
+		if (!lstat(alt->base, st))
  1323
+			return 0;
  1324
+	}
  1325
+
  1326
+	return -1;
  1327
+}
  1328
+
1309 1329
 static int open_sha1_file(const unsigned char *sha1)
1310 1330
 {
1311 1331
 	int fd;
@@ -1693,52 +1713,21 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset)
1693 1713
 	return type;
1694 1714
 }
1695 1715
 
1696  
-
1697 1716
 #define POI_STACK_PREALLOC 64
1698 1717
 
1699  
-static int packed_object_info(struct packed_git *p, off_t obj_offset,
1700  
-			      unsigned long *sizep, int *rtype,
1701  
-			      unsigned long *disk_sizep)
  1718
+static enum object_type packed_to_object_type(struct packed_git *p,
  1719
+					      off_t obj_offset,
  1720
+					      enum object_type type,
  1721
+					      struct pack_window **w_curs,
  1722
+					      off_t curpos)
1702 1723
 {
1703  
-	struct pack_window *w_curs = NULL;
1704  
-	unsigned long size;
1705  
-	off_t curpos = obj_offset;
1706  
-	enum object_type type;
1707 1724
 	off_t small_poi_stack[POI_STACK_PREALLOC];
1708 1725
 	off_t *poi_stack = small_poi_stack;
1709 1726
 	int poi_stack_nr = 0, poi_stack_alloc = POI_STACK_PREALLOC;
1710 1727
 
1711  
-	type = unpack_object_header(p, &w_curs, &curpos, &size);
1712  
-
1713  
-	if (rtype)
1714  
-		*rtype = type; /* representation type */
1715  
-
1716  
-	if (sizep) {
1717  
-		if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1718  
-			off_t tmp_pos = curpos;
1719  
-			off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
1720  
-							   type, obj_offset);
1721  
-			if (!base_offset) {
1722  
-				type = OBJ_BAD;
1723  
-				goto out;
1724  
-			}
1725  
-			*sizep = get_size_from_delta(p, &w_curs, tmp_pos);
1726  
-			if (*sizep == 0) {
1727  
-				type = OBJ_BAD;
1728  
-				goto out;
1729  
-			}
1730  
-		} else {
1731  
-			*sizep = size;
1732  
-		}
1733  
-	}
1734  
-
1735  
-	if (disk_sizep) {
1736  
-		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
1737  
-		*disk_sizep = revidx[1].offset - obj_offset;
1738  
-	}
1739  
-
1740 1728
 	while (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
1741 1729
 		off_t base_offset;
  1730
+		unsigned long size;
1742 1731
 		/* Push the object we're going to leave behind */
1743 1732
 		if (poi_stack_nr >= poi_stack_alloc && poi_stack == small_poi_stack) {
1744 1733
 			poi_stack_alloc = alloc_nr(poi_stack_nr);
@@ -1749,11 +1738,11 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
1749 1738
 		}
1750 1739
 		poi_stack[poi_stack_nr++] = obj_offset;
1751 1740
 		/* If parsing the base offset fails, just unwind */
1752  
-		base_offset = get_delta_base(p, &w_curs, &curpos, type, obj_offset);
  1741
+		base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset);
1753 1742
 		if (!base_offset)
1754 1743
 			goto unwind;
1755 1744
 		curpos = obj_offset = base_offset;
1756  
-		type = unpack_object_header(p, &w_curs, &curpos, &size);
  1745
+		type = unpack_object_header(p, w_curs, &curpos, &size);
1757 1746
 		if (type <= OBJ_NONE) {
1758 1747
 			/* If getting the base itself fails, we first
1759 1748
 			 * retry the base, otherwise unwind */
@@ -1780,7 +1769,6 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
1780 1769
 out:
1781 1770
 	if (poi_stack != small_poi_stack)
1782 1771
 		free(poi_stack);
1783  
-	unuse_pack(&w_curs);
1784 1772
 	return type;
1785 1773
 
1786 1774
 unwind:
@@ -1794,6 +1782,57 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
1794 1782
 	goto out;
1795 1783
 }
1796 1784
 
  1785
+static int packed_object_info(struct packed_git *p, off_t obj_offset,
  1786
+			      struct object_info *oi)
  1787
+{
  1788
+	struct pack_window *w_curs = NULL;
  1789
+	unsigned long size;
  1790
+	off_t curpos = obj_offset;
  1791
+	enum object_type type;
  1792
+
  1793
+	/*
  1794
+	 * We always get the representation type, but only convert it to
  1795
+	 * a "real" type later if the caller is interested.
  1796
+	 */
  1797
+	type = unpack_object_header(p, &w_curs, &curpos, &size);
  1798
+
  1799
+	if (oi->sizep) {
  1800
+		if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
  1801
+			off_t tmp_pos = curpos;
  1802
+			off_t base_offset = get_delta_base(p, &w_curs, &tmp_pos,
  1803
+							   type, obj_offset);
  1804
+			if (!base_offset) {
  1805
+				type = OBJ_BAD;
  1806
+				goto out;
  1807
+			}
  1808
+			*oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos);
  1809
+			if (*oi->sizep == 0) {
  1810
+				type = OBJ_BAD;
  1811
+				goto out;
  1812
+			}
  1813
+		} else {
  1814
+			*oi->sizep = size;
  1815
+		}
  1816
+	}
  1817
+
  1818
+	if (oi->disk_sizep) {
  1819
+		struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
  1820
+		*oi->disk_sizep = revidx[1].offset - obj_offset;
  1821
+	}
  1822
+
  1823
+	if (oi->typep) {
  1824
+		*oi->typep = packed_to_object_type(p, obj_offset, type, &w_curs, curpos);
  1825
+		if (*oi->typep < 0) {
  1826
+			type = OBJ_BAD;
  1827
+			goto out;
  1828
+		}
  1829
+	}
  1830
+
  1831
+out:
  1832
+	unuse_pack(&w_curs);
  1833
+	return type;
  1834
+}
  1835
+
1797 1836
 static void *unpack_compressed_entry(struct packed_git *p,
1798 1837
 				    struct pack_window **w_curs,
1799 1838
 				    off_t curpos,
@@ -2363,8 +2402,8 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1,
2363 2402
 
2364 2403
 }
2365 2404
 
2366  
-static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *sizep,
2367  
-				  unsigned long *disk_sizep)
  2405
+static int sha1_loose_object_info(const unsigned char *sha1,
  2406
+				  struct object_info *oi)
2368 2407
 {
2369 2408
 	int status;
2370 2409
 	unsigned long mapsize, size;
@@ -2372,21 +2411,37 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
2372 2411
 	git_zstream stream;
2373 2412
 	char hdr[32];
2374 2413
 
  2414
+	/*
  2415
+	 * If we don't care about type or size, then we don't
  2416
+	 * need to look inside the object at all.
  2417
+	 */
  2418
+	if (!oi->typep && !oi->sizep) {
  2419
+		if (oi->disk_sizep) {
  2420
+			struct stat st;
  2421
+			if (stat_sha1_file(sha1, &st) < 0)
  2422
+				return -1;
  2423
+			*oi->disk_sizep = st.st_size;
  2424
+		}
  2425
+		return 0;
  2426
+	}
  2427
+
2375 2428
 	map = map_sha1_file(sha1, &mapsize);
2376 2429
 	if (!map)
2377 2430
 		return -1;
2378  
-	if (disk_sizep)
2379  
-		*disk_sizep = mapsize;
  2431
+	if (oi->disk_sizep)
  2432
+		*oi->disk_sizep = mapsize;
2380 2433
 	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
2381 2434
 		status = error("unable to unpack %s header",
2382 2435
 			       sha1_to_hex(sha1));
2383 2436
 	else if ((status = parse_sha1_header(hdr, &size)) < 0)
2384 2437
 		status = error("unable to parse %s header", sha1_to_hex(sha1));
2385  
-	else if (sizep)
2386  
-		*sizep = size;
  2438
+	else if (oi->sizep)
  2439
+		*oi->sizep = size;
2387 2440
 	git_inflate_end(&stream);
2388 2441
 	munmap(map, mapsize);
2389  
-	return status;
  2442
+	if (oi->typep)
  2443
+		*oi->typep = status;
  2444
+	return 0;
2390 2445
 }
2391 2446
 
2392 2447
 /* returns enum object_type or negative */
@@ -2394,37 +2449,37 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
2394 2449
 {
2395 2450
 	struct cached_object *co;
2396 2451
 	struct pack_entry e;
2397  
-	int status, rtype;
  2452
+	int rtype;
2398 2453
 
2399 2454
 	co = find_cached_object(sha1);
2400 2455
 	if (co) {
  2456
+		if (oi->typep)
  2457
+			*(oi->typep) = co->type;
2401 2458
 		if (oi->sizep)
2402 2459
 			*(oi->sizep) = co->size;
2403 2460
 		if (oi->disk_sizep)
2404 2461
 			*(oi->disk_sizep) = 0;
2405 2462
 		oi->whence = OI_CACHED;
2406  
-		return co->type;
  2463
+		return 0;
2407 2464
 	}
2408 2465
 
2409 2466
 	if (!find_pack_entry(sha1, &e)) {
2410 2467
 		/* Most likely it's a loose object. */
2411  
-		status = sha1_loose_object_info(sha1, oi->sizep, oi->disk_sizep);
2412  
-		if (status >= 0) {
  2468
+		if (!sha1_loose_object_info(sha1, oi)) {
2413 2469
 			oi->whence = OI_LOOSE;
2414  
-			return status;
  2470
+			return 0;
2415 2471
 		}
2416 2472
 
2417 2473
 		/* Not a loose object; someone else may have just packed it. */
2418 2474
 		reprepare_packed_git();
2419 2475
 		if (!find_pack_entry(sha1, &e))
2420  
-			return status;
  2476
+			return -1;
2421 2477
 	}
2422 2478
 
2423  
-	status = packed_object_info(e.p, e.offset, oi->sizep, &rtype,
2424  
-				    oi->disk_sizep);
2425  
-	if (status < 0) {
  2479
+	rtype = packed_object_info(e.p, e.offset, oi);
  2480
+	if (rtype < 0) {
2426 2481
 		mark_bad_packed_object(e.p, sha1);
2427  
-		status = sha1_object_info_extended(sha1, oi);
  2482
+		return sha1_object_info_extended(sha1, oi);
2428 2483
 	} else if (in_delta_base_cache(e.p, e.offset)) {
2429 2484
 		oi->whence = OI_DBCACHED;
2430 2485
 	} else {
@@ -2435,15 +2490,19 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
2435 2490
 					 rtype == OBJ_OFS_DELTA);
2436 2491
 	}
2437 2492
 
2438  
-	return status;
  2493
+	return 0;
2439 2494
 }
2440 2495
 
2441 2496
 int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
2442 2497
 {
2443  
-	struct object_info oi = {0};
  2498
+	enum object_type type;
  2499
+	struct object_info oi = {NULL};
2444 2500
 
  2501
+	oi.typep = &type;
2445 2502
 	oi.sizep = sizep;
2446  
-	return sha1_object_info_extended(sha1, &oi);
  2503
+	if (sha1_object_info_extended(sha1, &oi) < 0)
  2504
+		return -1;
  2505
+	return type;
2447 2506
 }
2448 2507
 
2449 2508
 static void *read_packed_sha1(const unsigned char *sha1,
14  sha1_name.c
@@ -452,13 +452,15 @@ static int get_sha1_basic(const char *str, int len, unsigned char *sha1)
452 452
 	int at, reflog_len, nth_prior = 0;
453 453
 
454 454
 	if (len == 40 && !get_sha1_hex(str, sha1)) {
455  
-		refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
456  
-		if (refs_found > 0 && warn_ambiguous_refs) {
457  
-			warning(warn_msg, len, str);
458  
-			if (advice_object_name_warning)
459  
-				fprintf(stderr, "%s\n", _(object_name_msg));
  455
+		if (warn_on_object_refname_ambiguity) {
  456
+			refs_found = dwim_ref(str, len, tmp_sha1, &real_ref);
  457
+			if (refs_found > 0 && warn_ambiguous_refs) {
  458
+				warning(warn_msg, len, str);
  459
+				if (advice_object_name_warning)
  460
+					fprintf(stderr, "%s\n", _(object_name_msg));
  461
+			}
  462
+			free(real_ref);
460 463
 		}
461  
-		free(real_ref);
462 464
 		return 0;
463 465
 	}
464 466
 
4  streaming.c
@@ -111,11 +111,11 @@ static enum input_source istream_source(const unsigned char *sha1,
111 111
 	unsigned long size;
112 112
 	int status;
113 113
 
  114
+	oi->typep = type;
114 115
 	oi->sizep = &size;
115 116
 	status = sha1_object_info_extended(sha1, oi);
116 117
 	if (status < 0)
117 118
 		return stream_error;
118  
-	*type = status;
119 119
 
120 120
 	switch (oi->whence) {
121 121
 	case OI_LOOSE:
@@ -135,7 +135,7 @@ struct git_istream *open_istream(const unsigned char *sha1,
135 135
 				 struct stream_filter *filter)
136 136
 {
137 137
 	struct git_istream *st;
138  
-	struct object_info oi = {0};
  138
+	struct object_info oi = {NULL};
139 139
 	const unsigned char *real = lookup_replace_object(sha1);
140 140
 	enum input_source src = istream_source(real, type, &oi);
141 141
 

0 notes on commit 356df9b

Please sign in to comment.
Something went wrong with that request. Please try again.