Skip to content

Commit

Permalink
ref-filter: add mailmap support
Browse files Browse the repository at this point in the history
Add mailmap support to ref-filter to formats which are similar in
pretty. This support is such that the following pretty placeholders are
equivalent to the new ref-filter atoms

	%aN = authorname:mailmap
	%cN = committername:mailmap

	%aE = authoremail:mailmap
	%aL = authoremail:mailmap,localpart    (this order matters)
	%cE = committeremail:mailmap
	%cL = committeremail:mailmap,localpart (this order matters)

Additionally, mailmap can also be used with ":trim" option for email by
doing something like "authoremail:mailmap,trim" (this order matters).

The above also applies for the "tagger" atom, that is,
"taggername:mailmap", "taggeremail:mailmap", "taggeremail:mailmap,trim"
and "taggername:mailmap,localpart" (the order of options matters here
again).

Signed-off-by: Kousik Sanagavarapu <five231003@gmail.com>
  • Loading branch information
five-sh committed Aug 1, 2023
1 parent 1b8601e commit ee38de4
Showing 1 changed file with 77 additions and 8 deletions.
85 changes: 77 additions & 8 deletions ref-filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
#include "oid-array.h"
#include "repository.h"
#include "commit.h"
#include "mailmap.h"
#include "ident.h"
#include "remote.h"
#include "color.h"
#include "tag.h"
Expand Down Expand Up @@ -78,6 +80,11 @@ static struct ref_trailer_buf {
struct strbuf kvsepbuf;
} ref_trailer_buf = {STRING_LIST_INIT_NODUP, STRBUF_INIT, STRBUF_INIT};

static struct mailmap {
int use;
struct string_list list;
} mailmap = { 0, STRING_LIST_INIT_NODUP };

static struct expand_data {
struct object_id oid;
enum object_type type;
Expand Down Expand Up @@ -212,8 +219,12 @@ static struct used_atom {
struct {
enum { O_SIZE, O_SIZE_DISK } option;
} objectsize;
struct {
enum { NO_RAW, NO_MM } option;
} name_option;
struct email_option {
enum { EO_RAW, EO_TRIM, EO_LOCALPART } option;
enum { EO_RAW, EO_TRIM, EO_LOCALPART,
EO_MM_RAW, EO_MM_TRIM, EO_MM_LOCALPART } option;

This comment has been minimized.

Copy link
@chriscool

chriscool Aug 2, 2023

I would have thought that it would be enough to add EO_MM (or EO_MAILMAP to be clearer), instead of EO_MM_RAW, EO_MM_TRIM and EO_MM_LOCALPART. Isn't it possible already to both trim and only get the local part?

Maybe instead of a regular enum, an enum with bitfield would be better for this.

This is an example of such an enum from the top of ref-filter.h:

enum ref_sorting_order {
        REF_SORTING_REVERSE = 1<<0,
        REF_SORTING_ICASE = 1<<1,
        REF_SORTING_VERSION = 1<<2,
        REF_SORTING_DETACHED_HEAD_FIRST = 1<<3,
};

So perhaps something like:

enum {
	EO_RAW = 1<<0,
	EO_TRIM = 1<<1,
	EO_LOCALPART = 1<<2,
	EO_MAILMAP = 1<<3,
} option;

This comment has been minimized.

Copy link
@five-sh

five-sh Aug 2, 2023

Author Owner

Maybe instead of a regular enum, an enum with bitfield would be better for this.

Just for the sake of learning, what makes it more advantageous? I'm curious.

So perhaps something like:

enum {
	EO_RAW = 1<<0,
	EO_TRIM = 1<<1,
	EO_LOCALPART = 1<<2,
	EO_MAILMAP = 1<<3,
} option;

Thanks I'll make this change.

This comment has been minimized.

Copy link
@chriscool

chriscool Aug 2, 2023

With the email_atom_option_parser() function that I suggest below, having an enum with bitfields allows things like the following:

for (...) {
	int opt;

	/* Prepare and check arg */
 	...
	
	opt = email_atom_option_parser(&arg, err);
	if (opt < 0)
		return strbuf_addf_ret(err, -1, "%s: bad option %s", atom->name, arg);
	atom->u.email_option.option |= opt;

	...
}

Then processing the option can be done with code like:

	if (atom->u.email_option.option & EO_TRIM)
		strbuf_trim(&val_buf);

	if (atom->u.email_option.option & EO_MAILMAP)
		change_to_mailmap_value(&val_buf);

	if (atom->u.email_option.option & EO_LOCALPART)
		get_only_local_part(&val_buf);

In other words all the options can be stored in a single int, as each one uses only one bit of that int. And then the C bitwise operators like | and & can be used to operate on the separate bits.

See how REF_SORTING_VERSION or REF_SORTING_ICASE are used.

} email_option;
struct {
enum { S_BARE, S_GRADE, S_SIGNER, S_KEY,
Expand Down Expand Up @@ -528,6 +539,21 @@ static int oid_atom_parser(struct ref_format *format UNUSED,
return 0;
}

static int person_name_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
{
if (!arg) {
atom->u.name_option.option = NO_RAW;
} else if (!strcmp(arg, "mailmap")) {
mailmap.use = 1;
atom->u.name_option.option = NO_MM;
} else {
return err_bad_arg(err, atom->name, arg);
}
return 0;
}

static int person_email_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
Expand All @@ -538,6 +564,20 @@ static int person_email_atom_parser(struct ref_format *format UNUSED,
atom->u.email_option.option = EO_TRIM;
else if (!strcmp(arg, "localpart"))
atom->u.email_option.option = EO_LOCALPART;
else if (skip_prefix(arg, "mailmap", &arg)) {
mailmap.use = 1;
arg++;
if (!*arg)
atom->u.email_option.option = EO_MM_RAW;
else if (!strcmp(arg, "trim"))
atom->u.email_option.option = EO_MM_TRIM;
else if (!strcmp(arg, "localpart"))
atom->u.email_option.option = EO_MM_LOCALPART;
else
return strbuf_addf_ret(err, -1,
_("%s cannot be used with %s in %s"),
"mailmap", arg, atom->name);
}

This comment has been minimized.

Copy link
@chriscool

chriscool Aug 2, 2023

I think it might be better to have a separate helper function like:

static int email_atom_option_parser(const char **arg, struct strbuf *err)
{
	if (!skip_prefix(*arg, "trim", arg))
		return EO_TRIM;
	if (!skip_prefix(*arg, "localpart", arg))
		return EO_LOCALPART;
	if (skip_prefix(*arg, "mailmap", arg))
		return EO_MAILMAP;
	return -1;
}

and then call this function in a loop, like what we did for the describe atom.

This comment has been minimized.

Copy link
@five-sh

five-sh Aug 2, 2023

Author Owner

Yeah that way we can make the ordering of the options irrelavant too. I'll do this change.

else
return err_bad_arg(err, atom->name, arg);
return 0;
Expand Down Expand Up @@ -684,15 +724,15 @@ static struct {
[ATOM_TYPE] = { "type", SOURCE_OBJ },
[ATOM_TAG] = { "tag", SOURCE_OBJ },
[ATOM_AUTHOR] = { "author", SOURCE_OBJ },
[ATOM_AUTHORNAME] = { "authorname", SOURCE_OBJ },
[ATOM_AUTHORNAME] = { "authorname", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_AUTHOREMAIL] = { "authoremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_AUTHORDATE] = { "authordate", SOURCE_OBJ, FIELD_TIME },
[ATOM_COMMITTER] = { "committer", SOURCE_OBJ },
[ATOM_COMMITTERNAME] = { "committername", SOURCE_OBJ },
[ATOM_COMMITTERNAME] = { "committername", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_COMMITTEREMAIL] = { "committeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_COMMITTERDATE] = { "committerdate", SOURCE_OBJ, FIELD_TIME },
[ATOM_TAGGER] = { "tagger", SOURCE_OBJ },
[ATOM_TAGGERNAME] = { "taggername", SOURCE_OBJ },
[ATOM_TAGGERNAME] = { "taggername", SOURCE_OBJ, FIELD_STR, person_name_atom_parser },
[ATOM_TAGGEREMAIL] = { "taggeremail", SOURCE_OBJ, FIELD_STR, person_email_atom_parser },
[ATOM_TAGGERDATE] = { "taggerdate", SOURCE_OBJ, FIELD_TIME },
[ATOM_CREATOR] = { "creator", SOURCE_OBJ },
Expand Down Expand Up @@ -1276,6 +1316,23 @@ static const char *find_wholine(const char *who, int wholen, const char *buf)
return "";
}

static const char *change_to_mailmap_value(const char *buf)
{
struct strbuf sb = STRBUF_INIT;
size_t buflen = strlen(buf);
const char *headers[] = { "author ", "committer ",
"tagger ", NULL };

if (!*buf)
return xstrdup("");

strbuf_attach(&sb, (void *)buf, buflen, buflen + 1);
read_mailmap(&mailmap.list);
apply_mailmap_to_header(&sb, headers, &mailmap.list);

return buf;
}

This comment has been minimized.

Copy link
@chriscool

chriscool Aug 2, 2023

I am not sure that it's correct to change buf like this. I think this function should return a new buffer.

This comment has been minimized.

Copy link
@chriscool

chriscool Aug 2, 2023

Another possibility is to make it modify an existing strbuf.


static const char *copy_line(const char *buf)
{
const char *eol;
Expand All @@ -1289,7 +1346,11 @@ static const char *copy_name(const char *buf)
{
const char *cp;

buf = strchr(buf, ' ') + 1; /* skip who */
if (mailmap.use)
change_to_mailmap_value(buf);
else
buf = strchr(buf, ' ') + 1; /* skip who */

for (cp = buf; *cp && *cp != '\n'; cp++) {
if (starts_with(cp, " <"))
return xmemdupz(buf, cp - buf);
Expand All @@ -1299,21 +1360,29 @@ static const char *copy_name(const char *buf)

static const char *copy_email(const char *buf, struct used_atom *atom)
{
const char *email = strchr(buf, '<');
const char *email;
const char *eoemail;

if (mailmap.use)
buf = change_to_mailmap_value(buf);

email = strchr(buf, '<');
if (!email)
return xstrdup("");
switch (atom->u.email_option.option) {
case EO_RAW:
case EO_MM_RAW:
eoemail = strchr(email, '>');
if (eoemail)
eoemail++;
break;
case EO_TRIM:
case EO_MM_TRIM:
email++;
eoemail = strchr(email, '>');
break;
case EO_LOCALPART:
case EO_MM_LOCALPART:
email++;
eoemail = strchr(email, '@');
if (!eoemail)
Expand Down Expand Up @@ -1400,7 +1469,7 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void
if (strncmp(who, name, wholen))
continue;
if (name[wholen] != 0 &&
strcmp(name + wholen, "name") &&
!starts_with(name + wholen, "name") &&
!starts_with(name + wholen, "email") &&
!starts_with(name + wholen, "date"))
continue;
Expand All @@ -1410,7 +1479,7 @@ static void grab_person(const char *who, struct atom_value *val, int deref, void
return; /* no point looking for it */
if (name[wholen] == 0)
v->s = copy_line(wholine);
else if (!strcmp(name + wholen, "name"))
else if (starts_with(name + wholen, "name"))
v->s = copy_name(wholine);
else if (starts_with(name + wholen, "email"))
v->s = copy_email(wholine, &used_atom[i]);
Expand Down

1 comment on commit ee38de4

@chriscool
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

%aL = authoremail:mailmap,localpart (this order matters)

Why does the order matters? What happens if the order is changed? Isn't there a way to make authoremail:localpart,mailmap work in the same way?

Please sign in to comment.