Skip to content

Commit

Permalink
decode uri components in doi when possible
Browse files Browse the repository at this point in the history
Also, attempt to drop last bracket or paren if there
is an opened one before DOI to not skip and pass existing tests.

Fixes: zotero/zotero#3218
  • Loading branch information
abaevbog committed Sep 6, 2023
1 parent 4ad1eec commit b634347
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 6 deletions.
18 changes: 14 additions & 4 deletions test/tests/utilitiesTest.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,23 @@ describe("Zotero.Utilities", function() {
assert.equal(cleanDOI(`Foo bar ${doi}. Foo bar`), doi);
});

// FIXME
it.skip("should parse a DOI in parentheses", function () {
it("should parse a DOI with encoded < and >", function () {
const encodedUri = "10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M";
const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M";
assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected);
});

it("should parse a DOI with url encoded params", function () {
const encodedUri = "https://doi.org/10.1002/1096-9128(200005)12:6%3C375::AID-CPE480%3E3.0.CO;2-M";
const expected = "10.1002/1096-9128(200005)12:6<375::AID-CPE480>3.0.CO;2-M";
assert.equal(cleanDOI(`Foo bar ${encodedUri}. Foo bar`), expected);
});

it("should parse a DOI in parentheses", function () {
assert.equal(cleanDOI(`Foo bar (${doi}) foo bar`), doi);
});

// FIXME
it.skip("should parse a DOI in brackets", function () {
it("should parse a DOI in brackets", function () {
assert.equal(cleanDOI(`Foo bar [${doi}] foo bar`), doi);
});
});
Expand Down
32 changes: 30 additions & 2 deletions utilities.js
Original file line number Diff line number Diff line change
Expand Up @@ -482,9 +482,37 @@ var Utilities = {
if(typeof(x) != "string") {
throw new Error("cleanDOI: argument must be a string");
}

// If it's a url, decode it
if (x.match(/^https?:/)) {
x = decodeURIComponent(x);
}
// Even if it's not a URL decode %3C followed by %3E as < >
if (x.indexOf("%3C") < x.indexOf("%3E") && x.indexOf("%3C") >= 0) {
x = x.replace(/%3C/g, "<");
x = x.replace(/%3E/g, ">");
}
var doi = x.match(/10(?:\.[0-9]{4,})?\/[^\s]*[^\s\.,]/);
return doi ? doi[0] : null;
if (!doi) {
return null;
}
var result = doi[0];

// Check if the DOI ends with a bracket
const trailingBracket = result.slice(-1);
if ([']', ')', '}'].includes(trailingBracket)) {
// Check the portion of the string before the matched DOI for an unclosed bracket
const beforeDOI = x.slice(0, doi.index);
const openingBracket = {
']': '[',
')': '(',
'}': '{'
}[trailingBracket];
if (beforeDOI.lastIndexOf(openingBracket) > beforeDOI.lastIndexOf(trailingBracket)) {
// Remove the trailing bracket from the DOI
result = result.slice(0, -1);
}
}
return result;
},

/**
Expand Down

0 comments on commit b634347

Please sign in to comment.