Skip to content

Commit

Permalink
Merge branch 'cn/bom-in-gitignore' into next
Browse files Browse the repository at this point in the history
Teach the codepaths that read .gitignore and .gitattributes files
that these files encoded in UTF-8 may have UTF-8 BOM marker at the
beginning; this makes it in line with what we do for configuration
files already.

* cn/bom-in-gitignore:
  attr: skip UTF8 BOM at the beginning of the input file
  config: use utf8_bom[] from utf.[ch] in git_parse_source()
  utf8-bom: introduce skip_utf8_bom() helper
  add_excludes_from_file: clarify the bom skipping logic
  dir: allow a BOM at the beginning of exclude files
  • Loading branch information
gitster committed Apr 21, 2015
2 parents 9a8b1e9 + 27547e5 commit f7d56f1
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 5 deletions.
9 changes: 7 additions & 2 deletions attr.c
Expand Up @@ -12,6 +12,7 @@
#include "exec_cmd.h"
#include "attr.h"
#include "dir.h"
#include "utf8.h"

const char git_attr__true[] = "(builtin)true";
const char git_attr__false[] = "\0(builtin)false";
Expand Down Expand Up @@ -379,8 +380,12 @@ static struct attr_stack *read_attr_from_file(const char *path, int macro_ok)
return NULL;
}
res = xcalloc(1, sizeof(*res));
while (fgets(buf, sizeof(buf), fp))
handle_attr_line(res, buf, path, ++lineno, macro_ok);
while (fgets(buf, sizeof(buf), fp)) {
char *bufp = buf;
if (!lineno)
skip_utf8_bom(&bufp, strlen(bufp));
handle_attr_line(res, bufp, path, ++lineno, macro_ok);
}
fclose(fp);
return res;
}
Expand Down
6 changes: 3 additions & 3 deletions config.c
Expand Up @@ -12,6 +12,7 @@
#include "quote.h"
#include "hashmap.h"
#include "string-list.h"
#include "utf8.h"

struct config_source {
struct config_source *prev;
Expand Down Expand Up @@ -417,16 +418,15 @@ static int git_parse_source(config_fn_t fn, void *data)
struct strbuf *var = &cf->var;

/* U+FEFF Byte Order Mark in UTF8 */
static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
const unsigned char *bomptr = utf8_bom;
const char *bomptr = utf8_bom;

for (;;) {
int c = get_next_char();
if (bomptr && *bomptr) {
/* We are at the file beginning; skip UTF8-encoded BOM
* if present. Sane editors won't put this in on their
* own, but e.g. Windows Notepad will do it happily. */
if ((unsigned char) c == *bomptr) {
if (c == (*bomptr & 0377)) {
bomptr++;
continue;
} else {
Expand Down
6 changes: 6 additions & 0 deletions dir.c
Expand Up @@ -12,6 +12,7 @@
#include "refs.h"
#include "wildmatch.h"
#include "pathspec.h"
#include "utf8.h"

struct path_simplify {
int len;
Expand Down Expand Up @@ -617,7 +618,12 @@ int add_excludes_from_file_to_list(const char *fname,
}

el->filebuf = buf;

if (skip_utf8_bom(&buf, size))
size -= buf - el->filebuf;

entry = buf;

for (i = 0; i < size; i++) {
if (buf[i] == '\n') {
if (entry != buf + i && entry[0] != '#') {
Expand Down
9 changes: 9 additions & 0 deletions t/t7061-wtstatus-ignore.sh
Expand Up @@ -20,6 +20,15 @@ test_expect_success 'status untracked directory with --ignored' '
test_cmp expected actual
'

test_expect_success 'same with gitignore starting with BOM' '
printf "\357\273\277ignored\n" >.gitignore &&
mkdir -p untracked &&
: >untracked/ignored &&
: >untracked/uncommitted &&
git status --porcelain --ignored >actual &&
test_cmp expected actual
'

cat >expected <<\EOF
?? .gitignore
?? actual
Expand Down
11 changes: 11 additions & 0 deletions utf8.c
Expand Up @@ -633,3 +633,14 @@ int is_hfs_dotgit(const char *path)

return 1;
}

const char utf8_bom[] = "\357\273\277";

int skip_utf8_bom(char **text, size_t len)
{
if (len < strlen(utf8_bom) ||
memcmp(*text, utf8_bom, strlen(utf8_bom)))
return 0;
*text += strlen(utf8_bom);
return 1;
}
3 changes: 3 additions & 0 deletions utf8.h
Expand Up @@ -13,6 +13,9 @@ int same_encoding(const char *, const char *);
__attribute__((format (printf, 2, 3)))
int utf8_fprintf(FILE *, const char *, ...);

extern const char utf8_bom[];
extern int skip_utf8_bom(char **, size_t);

void strbuf_add_wrapped_text(struct strbuf *buf,
const char *text, int indent, int indent2, int width);
void strbuf_add_wrapped_bytes(struct strbuf *buf, const char *data, int len,
Expand Down

0 comments on commit f7d56f1

Please sign in to comment.