Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

process_{tree,blob}: show objects without buffering

Here's a less trivial thing, and slightly more dubious one.

I was looking at that "struct object_array objects", and wondering why we
do that. I have honestly totally forgotten. Why not just call the "show()"
function as we encounter the objects? Rather than add the objects to the
object_array, and then at the very end going through the array and doing a
'show' on all, just do things more incrementally.

Now, there are possible downsides to this:

 - the "buffer using object_array" _can_ in theory result in at least
   better I-cache usage (two tight loops rather than one more spread out
   one). I don't think this is a real issue, but in theory..

 - this _does_ change the order of the objects printed. Instead of doing a
   "process_tree(revs, commit->tree, &objects, NULL, "");" in the loop
   over the commits (which puts all the root trees _first_ in the object
   list, this patch just adds them to the list of pending objects, and
   then we'll traverse them in that order (and thus show each root tree
   object together with the objects we discover under it)

   I _think_ the new ordering actually makes more sense, but the object
   ordering is actually a subtle thing when it comes to packing
   efficiency, so any change in order is going to have implications for
   packing. Good or bad, I dunno.

 - There may be some reason why we did it that odd way with the object
   array, that I have simply forgotten.

Anyway, now that we don't buffer up the objects before showing them
that may actually result in lower memory usage during that whole
traverse_commit_list() phase.

This is seriously not very deeply tested. It makes sense to me, it seems
to pass all the tests, it looks ok, but...

Does anybody remember why we did that "object_array" thing? It used to be
an "object_list" a long long time ago, but got changed into the array due
to better memory usage patterns (those linked lists of obejcts are
horrible from a memory allocation standpoint). But I wonder why we didn't
do this back then. Maybe there's a reason for it.

Or maybe there _used_ to be a reason, and no longer is.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information...
commit 8d2dfc49b199c7da6faefd7993630f24bd37fee0 1 parent 2131526
Linus Torvalds torvalds authored gitster committed
16 builtin-pack-objects.c
@@ -1856,13 +1856,17 @@ static void show_commit(struct commit *commit)
1856 1856 commit->object.flags |= OBJECT_ADDED;
1857 1857 }
1858 1858
1859   -static void show_object(struct object_array_entry *p)
  1859 +static void show_object(struct object *obj, const char *name)
1860 1860 {
1861   - add_preferred_base_object(p->name);
1862   - add_object_entry(p->item->sha1, p->item->type, p->name, 0);
1863   - p->item->flags |= OBJECT_ADDED;
1864   - free((char *)p->name);
1865   - p->name = NULL;
  1861 + add_preferred_base_object(name);
  1862 + add_object_entry(obj->sha1, obj->type, name, 0);
  1863 + obj->flags |= OBJECT_ADDED;
  1864 +
  1865 + /*
  1866 + * We will have generated the hash from the name,
  1867 + * but not saved a pointer to it - we can free it
  1868 + */
  1869 + free((char *)name);
1866 1870 }
1867 1871
1868 1872 static void show_edge(struct commit *commit)
20 builtin-rev-list.c
@@ -169,27 +169,27 @@ static void finish_commit(struct commit *commit)
169 169 commit->buffer = NULL;
170 170 }
171 171
172   -static void finish_object(struct object_array_entry *p)
  172 +static void finish_object(struct object *obj, const char *name)
173 173 {
174   - if (p->item->type == OBJ_BLOB && !has_sha1_file(p->item->sha1))
175   - die("missing blob object '%s'", sha1_to_hex(p->item->sha1));
  174 + if (obj->type == OBJ_BLOB && !has_sha1_file(obj->sha1))
  175 + die("missing blob object '%s'", sha1_to_hex(obj->sha1));
176 176 }
177 177
178   -static void show_object(struct object_array_entry *p)
  178 +static void show_object(struct object *obj, const char *name)
179 179 {
180 180 /* An object with name "foo\n0000000..." can be used to
181 181 * confuse downstream "git pack-objects" very badly.
182 182 */
183   - const char *ep = strchr(p->name, '\n');
  183 + const char *ep = strchr(name, '\n');
184 184
185   - finish_object(p);
  185 + finish_object(obj, name);
186 186 if (ep) {
187   - printf("%s %.*s\n", sha1_to_hex(p->item->sha1),
188   - (int) (ep - p->name),
189   - p->name);
  187 + printf("%s %.*s\n", sha1_to_hex(obj->sha1),
  188 + (int) (ep - name),
  189 + name);
190 190 }
191 191 else
192   - printf("%s %s\n", sha1_to_hex(p->item->sha1), p->name);
  192 + printf("%s %s\n", sha1_to_hex(obj->sha1), name);
193 193 }
194 194
195 195 static void show_edge(struct commit *commit)
35 list-objects.c
@@ -10,7 +10,7 @@
10 10
11 11 static void process_blob(struct rev_info *revs,
12 12 struct blob *blob,
13   - struct object_array *p,
  13 + show_object_fn show,
14 14 struct name_path *path,
15 15 const char *name)
16 16 {
@@ -23,7 +23,7 @@ static void process_blob(struct rev_info *revs,
23 23 if (obj->flags & (UNINTERESTING | SEEN))
24 24 return;
25 25 obj->flags |= SEEN;
26   - add_object(obj, p, path, name);
  26 + show(obj, path_name(path, name));
27 27 }
28 28
29 29 /*
@@ -50,7 +50,7 @@ static void process_blob(struct rev_info *revs,
50 50 */
51 51 static void process_gitlink(struct rev_info *revs,
52 52 const unsigned char *sha1,
53   - struct object_array *p,
  53 + show_object_fn show,
54 54 struct name_path *path,
55 55 const char *name)
56 56 {
@@ -59,7 +59,7 @@ static void process_gitlink(struct rev_info *revs,
59 59
60 60 static void process_tree(struct rev_info *revs,
61 61 struct tree *tree,
62   - struct object_array *p,
  62 + show_object_fn show,
63 63 struct name_path *path,
64 64 const char *name)
65 65 {
@@ -77,7 +77,7 @@ static void process_tree(struct rev_info *revs,
77 77 if (parse_tree(tree) < 0)
78 78 die("bad tree object %s", sha1_to_hex(obj->sha1));
79 79 obj->flags |= SEEN;
80   - add_object(obj, p, path, name);
  80 + show(obj, path_name(path, name));
81 81 me.up = path;
82 82 me.elem = name;
83 83 me.elem_len = strlen(name);
@@ -88,14 +88,14 @@ static void process_tree(struct rev_info *revs,
88 88 if (S_ISDIR(entry.mode))
89 89 process_tree(revs,
90 90 lookup_tree(entry.sha1),
91   - p, &me, entry.path);
  91 + show, &me, entry.path);
92 92 else if (S_ISGITLINK(entry.mode))
93 93 process_gitlink(revs, entry.sha1,
94   - p, &me, entry.path);
  94 + show, &me, entry.path);
95 95 else
96 96 process_blob(revs,
97 97 lookup_blob(entry.sha1),
98   - p, &me, entry.path);
  98 + show, &me, entry.path);
99 99 }
100 100 free(tree->buffer);
101 101 tree->buffer = NULL;
@@ -134,16 +134,20 @@ void mark_edges_uninteresting(struct commit_list *list,
134 134 }
135 135 }
136 136
  137 +static void add_pending_tree(struct rev_info *revs, struct tree *tree)
  138 +{
  139 + add_pending_object(revs, &tree->object, "");
  140 +}
  141 +
137 142 void traverse_commit_list(struct rev_info *revs,
138 143 void (*show_commit)(struct commit *),
139   - void (*show_object)(struct object_array_entry *))
  144 + void (*show_object)(struct object *, const char *))
140 145 {
141 146 int i;
142 147 struct commit *commit;
143   - struct object_array objects = { 0, 0, NULL };
144 148
145 149 while ((commit = get_revision(revs)) != NULL) {
146   - process_tree(revs, commit->tree, &objects, NULL, "");
  150 + add_pending_tree(revs, commit->tree);
147 151 show_commit(commit);
148 152 }
149 153 for (i = 0; i < revs->pending.nr; i++) {
@@ -154,25 +158,22 @@ void traverse_commit_list(struct rev_info *revs,
154 158 continue;
155 159 if (obj->type == OBJ_TAG) {
156 160 obj->flags |= SEEN;
157   - add_object_array(obj, name, &objects);
  161 + show_object(obj, name);
158 162 continue;
159 163 }
160 164 if (obj->type == OBJ_TREE) {
161   - process_tree(revs, (struct tree *)obj, &objects,
  165 + process_tree(revs, (struct tree *)obj, show_object,
162 166 NULL, name);
163 167 continue;
164 168 }
165 169 if (obj->type == OBJ_BLOB) {
166   - process_blob(revs, (struct blob *)obj, &objects,
  170 + process_blob(revs, (struct blob *)obj, show_object,
167 171 NULL, name);
168 172 continue;
169 173 }
170 174 die("unknown pending object %s (%s)",
171 175 sha1_to_hex(obj->sha1), name);
172 176 }
173   - for (i = 0; i < objects.nr; i++)
174   - show_object(&objects.objects[i]);
175   - free(objects.objects);
176 177 if (revs->pending.nr) {
177 178 free(revs->pending.objects);
178 179 revs->pending.nr = 0;
2  list-objects.h
@@ -2,7 +2,7 @@
2 2 #define LIST_OBJECTS_H
3 3
4 4 typedef void (*show_commit_fn)(struct commit *);
5   -typedef void (*show_object_fn)(struct object_array_entry *);
  5 +typedef void (*show_object_fn)(struct object *, const char *);
6 6 typedef void (*show_edge_fn)(struct commit *);
7 7
8 8 void traverse_commit_list(struct rev_info *revs, show_commit_fn, show_object_fn);
2  revision.c
@@ -14,7 +14,7 @@
14 14
15 15 volatile show_early_output_fn_t show_early_output;
16 16
17   -static char *path_name(struct name_path *path, const char *name)
  17 +char *path_name(struct name_path *path, const char *name)
18 18 {
19 19 struct name_path *p;
20 20 char *n, *m;
2  revision.h
@@ -141,6 +141,8 @@ struct name_path {
141 141 const char *elem;
142 142 };
143 143
  144 +char *path_name(struct name_path *path, const char *name);
  145 +
144 146 extern void add_object(struct object *obj,
145 147 struct object_array *p,
146 148 struct name_path *path,
12 upload-pack.c
@@ -78,20 +78,20 @@ static void show_commit(struct commit *commit)
78 78 commit->buffer = NULL;
79 79 }
80 80
81   -static void show_object(struct object_array_entry *p)
  81 +static void show_object(struct object *obj, const char *name)
82 82 {
83 83 /* An object with name "foo\n0000000..." can be used to
84 84 * confuse downstream git-pack-objects very badly.
85 85 */
86   - const char *ep = strchr(p->name, '\n');
  86 + const char *ep = strchr(name, '\n');
87 87 if (ep) {
88   - fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(p->item->sha1),
89   - (int) (ep - p->name),
90   - p->name);
  88 + fprintf(pack_pipe, "%s %.*s\n", sha1_to_hex(obj->sha1),
  89 + (int) (ep - name),
  90 + name);
91 91 }
92 92 else
93 93 fprintf(pack_pipe, "%s %s\n",
94   - sha1_to_hex(p->item->sha1), p->name);
  94 + sha1_to_hex(obj->sha1), name);
95 95 }
96 96
97 97 static void show_edge(struct commit *commit)

0 comments on commit 8d2dfc4

Please sign in to comment.
Something went wrong with that request. Please try again.