Skip to content

Commit

Permalink
dir_iterator: new API for iterating over a directory tree
Browse files Browse the repository at this point in the history
The iterator interface is modeled on that for references, though no
vtable is necessary because there is (so far?) only one type of
dir_iterator.

There are obviously a lot of features that could easily be added to this
class:

* Skip/include directory paths in the iteration
* Shallow/deep iteration
* Letting the caller decide which subdirectories to recurse into (e.g.,
  via a dir_iterator_advance_into() function)
* Option to iterate in sorted order
* Option to iterate over directory paths before vs. after their contents

But these are not needed for the current patch series, so I refrain.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
mhagger authored and gitster committed Jun 20, 2016
1 parent d24b21e commit 0fe5043
Show file tree
Hide file tree
Showing 3 changed files with 290 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -722,6 +722,7 @@ LIB_OBJS += diff-lib.o
LIB_OBJS += diff-no-index.o
LIB_OBJS += diff.o
LIB_OBJS += dir.o
LIB_OBJS += dir-iterator.o
LIB_OBJS += editor.o
LIB_OBJS += entry.o
LIB_OBJS += environment.o
Expand Down
202 changes: 202 additions & 0 deletions dir-iterator.c
@@ -0,0 +1,202 @@
#include "cache.h"
#include "dir.h"
#include "iterator.h"
#include "dir-iterator.h"

struct dir_iterator_level {
int initialized;

DIR *dir;

/*
* The length of the directory part of path at this level
* (including a trailing '/'):
*/
size_t prefix_len;

/*
* The last action that has been taken with the current entry
* (needed for directories, which have to be included in the
* iteration and also iterated into):
*/
enum {
DIR_STATE_ITER,
DIR_STATE_RECURSE
} dir_state;
};

/*
* The full data structure used to manage the internal directory
* iteration state. It includes members that are not part of the
* public interface.
*/
struct dir_iterator_int {
struct dir_iterator base;

/*
* The number of levels currently on the stack. This is always
* at least 1, because when it becomes zero the iteration is
* ended and this struct is freed.
*/
size_t levels_nr;

/* The number of levels that have been allocated on the stack */
size_t levels_alloc;

/*
* A stack of levels. levels[0] is the uppermost directory
* that will be included in this iteration.
*/
struct dir_iterator_level *levels;
};

int dir_iterator_advance(struct dir_iterator *dir_iterator)
{
struct dir_iterator_int *iter =
(struct dir_iterator_int *)dir_iterator;

while (1) {
struct dir_iterator_level *level =
&iter->levels[iter->levels_nr - 1];
struct dirent *de;

if (!level->initialized) {
/*
* Note: dir_iterator_begin() ensures that
* path is not the empty string.
*/
if (!is_dir_sep(iter->base.path.buf[iter->base.path.len - 1]))
strbuf_addch(&iter->base.path, '/');
level->prefix_len = iter->base.path.len;

level->dir = opendir(iter->base.path.buf);
if (!level->dir && errno != ENOENT) {
warning("error opening directory %s: %s",
iter->base.path.buf, strerror(errno));
/* Popping the level is handled below */
}

level->initialized = 1;
} else if (S_ISDIR(iter->base.st.st_mode)) {
if (level->dir_state == DIR_STATE_ITER) {
/*
* The directory was just iterated
* over; now prepare to iterate into
* it.
*/
level->dir_state = DIR_STATE_RECURSE;
ALLOC_GROW(iter->levels, iter->levels_nr + 1,
iter->levels_alloc);
level = &iter->levels[iter->levels_nr++];
level->initialized = 0;
continue;
} else {
/*
* The directory has already been
* iterated over and iterated into;
* we're done with it.
*/
}
}

if (!level->dir) {
/*
* This level is exhausted (or wasn't opened
* successfully); pop up a level.
*/
if (--iter->levels_nr == 0)
return dir_iterator_abort(dir_iterator);

continue;
}

/*
* Loop until we find an entry that we can give back
* to the caller:
*/
while (1) {
strbuf_setlen(&iter->base.path, level->prefix_len);
errno = 0;
de = readdir(level->dir);

if (!de) {
/* This level is exhausted; pop up a level. */
if (errno) {
warning("error reading directory %s: %s",
iter->base.path.buf, strerror(errno));
} else if (closedir(level->dir))
warning("error closing directory %s: %s",
iter->base.path.buf, strerror(errno));

level->dir = NULL;
if (--iter->levels_nr == 0)
return dir_iterator_abort(dir_iterator);
break;
}

if (is_dot_or_dotdot(de->d_name))
continue;

strbuf_addstr(&iter->base.path, de->d_name);
if (lstat(iter->base.path.buf, &iter->base.st) < 0) {
if (errno != ENOENT)
warning("error reading path '%s': %s",
iter->base.path.buf,
strerror(errno));
continue;
}

/*
* We have to set these each time because
* the path strbuf might have been realloc()ed.
*/
iter->base.relative_path =
iter->base.path.buf + iter->levels[0].prefix_len;
iter->base.basename =
iter->base.path.buf + level->prefix_len;
level->dir_state = DIR_STATE_ITER;

return ITER_OK;
}
}
}

int dir_iterator_abort(struct dir_iterator *dir_iterator)
{
struct dir_iterator_int *iter = (struct dir_iterator_int *)dir_iterator;

for (; iter->levels_nr; iter->levels_nr--) {
struct dir_iterator_level *level =
&iter->levels[iter->levels_nr - 1];

if (level->dir && closedir(level->dir)) {
strbuf_setlen(&iter->base.path, level->prefix_len);
warning("error closing directory %s: %s",
iter->base.path.buf, strerror(errno));
}
}

free(iter->levels);
strbuf_release(&iter->base.path);
free(iter);
return ITER_DONE;
}

struct dir_iterator *dir_iterator_begin(const char *path)
{
struct dir_iterator_int *iter = xcalloc(1, sizeof(*iter));
struct dir_iterator *dir_iterator = &iter->base;

if (!path || !*path)
die("BUG: empty path passed to dir_iterator_begin()");

strbuf_init(&iter->base.path, PATH_MAX);
strbuf_addstr(&iter->base.path, path);

ALLOC_GROW(iter->levels, 10, iter->levels_alloc);

iter->levels_nr = 1;
iter->levels[0].initialized = 0;

return dir_iterator;
}
87 changes: 87 additions & 0 deletions dir-iterator.h
@@ -0,0 +1,87 @@
#ifndef DIR_ITERATOR_H
#define DIR_ITERATOR_H

/*
* Iterate over a directory tree.
*
* Iterate over a directory tree, recursively, including paths of all
* types and hidden paths. Skip "." and ".." entries and don't follow
* symlinks except for the original path.
*
* Every time dir_iterator_advance() is called, update the members of
* the dir_iterator structure to reflect the next path in the
* iteration. The order that paths are iterated over within a
* directory is undefined, but directory paths are always iterated
* over before the subdirectory contents.
*
* A typical iteration looks like this:
*
* int ok;
* struct iterator *iter = dir_iterator_begin(path);
*
* while ((ok = dir_iterator_advance(iter)) == ITER_OK) {
* if (want_to_stop_iteration()) {
* ok = dir_iterator_abort(iter);
* break;
* }
*
* // Access information about the current path:
* if (S_ISDIR(iter->st.st_mode))
* printf("%s is a directory\n", iter->relative_path);
* }
*
* if (ok != ITER_DONE)
* handle_error();
*
* Callers are allowed to modify iter->path while they are working,
* but they must restore it to its original contents before calling
* dir_iterator_advance() again.
*/

struct dir_iterator {
/* The current path: */
struct strbuf path;

/*
* The current path relative to the starting path. This part
* of the path always uses "/" characters to separate path
* components:
*/
const char *relative_path;

/* The current basename: */
const char *basename;

/* The result of calling lstat() on path: */
struct stat st;
};

/*
* Start a directory iteration over path. Return a dir_iterator that
* holds the internal state of the iteration.
*
* The iteration includes all paths under path, not including path
* itself and not including "." or ".." entries.
*
* path is the starting directory. An internal copy will be made.
*/
struct dir_iterator *dir_iterator_begin(const char *path);

/*
* Advance the iterator to the first or next item and return ITER_OK.
* If the iteration is exhausted, free the dir_iterator and any
* resources associated with it and return ITER_DONE. On error, free
* dir_iterator and associated resources and return ITER_ERROR. It is
* a bug to use iterator or call this function again after it has
* returned ITER_DONE or ITER_ERROR.
*/
int dir_iterator_advance(struct dir_iterator *iterator);

/*
* End the iteration before it has been exhausted. Free the
* dir_iterator and any associated resources and return ITER_DONE. On
* error, free the dir_iterator and return ITER_ERROR.
*/
int dir_iterator_abort(struct dir_iterator *iterator);

#endif

0 comments on commit 0fe5043

Please sign in to comment.