Skip to content

Commit

Permalink
feat(thesesfr): updated parser for SPA
Browse files Browse the repository at this point in the history
  • Loading branch information
oxypomme committed Jul 18, 2024
1 parent 177f1a6 commit 824a972
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 11 deletions.
31 changes: 26 additions & 5 deletions thesesfr/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,13 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) {

let match;

if ((match = /^\/api\/v1\/document\/([0-9a-z]+)$/i.exec(path)) !== null) {
if ((match = /^\/api\/v1\/document\/(([0-9]{4})([0-9a-z]{4})[0-9a-z]+)$/i.exec(path)) !== null) {
// /api/v1/document/2019LYSE2053
// /api/v1/document/2010AIX22039
result.rtype = 'PHD_THESIS';
result.unitid = match[1];
result.publication_date = match[2];
result.institution_code = match[3];
switch (ec.status) {
case 200:
result.mime = 'PDF';
Expand All @@ -38,26 +41,44 @@ module.exports = new Parser(function analyseEC(parsedUrl, ec) {
break;
}

} else if ((match = /^\/([0-9]{8}[0-9X])$/i.exec(path)) !== null) {
// /258987731
} else if (
(match = /^\/([0-9]{8}[0-9X])$/i.exec(path)) !== null
|| (match = /^\/api\/v1\/personnes\/personne\/([0-9]{8}[0-9X])$/i.exec(path)) !== null
) {
// /264066944
// /api/v1/personnes/personne/264066944
result.rtype = 'BIO';
result.mime = 'HTML';
result.unitid = match[1];
result.ppn = match[1];

} else if ((match = /^\/(s[0-9]+)$/i.exec(path)) !== null) {
} else if (
(match = /^\/(s[0-9]+)$/i.exec(path)) !== null
|| (match = /^\/api\/v1\/theses\/these\/(s[0-9]+)$/i.exec(path)) !== null
) {
// /s366354
// /api/v1/theses/these/s383095
result.rtype = 'ABS';
result.mime = 'HTML';
result.unitid = match[1];

} else if ((match = /^\/(([0-9]{4})([a-z]{4})[0-9a-z]+)$/i.exec(path)) !== null) {
} else if (
(match = /^\/(([0-9]{4})([0-9a-z]{4})[0-9a-z]+)$/i.exec(path)) !== null
|| (match = /^\/api\/v1\/theses\/these\/(([0-9]{4})([0-9a-z]{4})[0-9a-z]+)$/i.exec(path)) !== null
) {
// /2023UPASP097
// /api/v1/theses/these/2024BORD0122
result.rtype = 'ABS';
result.mime = 'HTML';
result.unitid = match[1];
result.publication_date = match[2];
result.institution_code = match[3];

} else if (/^\/api\/v1\/theses\/recherche\/$/i.test(path)) {
// /api/v1/theses/recherche/?q=test&debut=0&nombre=10&tri=pertinence
result.rtype = 'SEARCH';
result.mime = 'HTML';

}

return result;
Expand Down
6 changes: 0 additions & 6 deletions thesesfr/test/thesesfr.2024-01-09.csv

This file was deleted.

10 changes: 10 additions & 0 deletions thesesfr/test/thesesfr.2024-07-18.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
out-publication_date;out-ppn;out-unitid;out-rtype;out-mime;out-publication_date;out-institution_code;in-url
;;2019LYSE2053;PHD_THESIS;MISC;2019;LYSE;https://theses.fr/api/v1/document/2019LYSE2053
;;2010AIX22039;PHD_THESIS;MISC;2010;AIX2;https://theses.fr/api/v1/document/2010AIX22039
;264066944;264066944;BIO;HTML;;;https://theses.fr/264066944
;264066944;264066944;BIO;HTML;;;https://theses.fr/api/v1/personnes/personne/264066944
;;s383095;ABS;HTML;;;https://theses.fr/s383095
;;s383095;ABS;HTML;;;https://theses.fr/api/v1/theses/these/s383095
2023;;2024BORD0122;ABS;HTML;2024;BORD;https://theses.fr/2024BORD0122
2023;;2024BORD0122;ABS;HTML;2024;BORD;https://theses.fr/api/v1/theses/these/2024BORD0122
;;;SEARCH;HTML;;;https://theses.fr/api/v1/theses/recherche/?q=test&debut=0&nombre=10&tri=pertinence

0 comments on commit 824a972

Please sign in to comment.