Skip to content

Commit

Permalink
Merge branch 'ezpaarse-project:master' into micromedex
Browse files Browse the repository at this point in the history
  • Loading branch information
librarywebchic committed Aug 7, 2023
2 parents 5bdd7ed + 0f6070d commit 7dc978e
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 10 deletions.
14 changes: 14 additions & 0 deletions ben/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"longname": "Benjamins",
"name": "ben",
"describe": "Recognizes the accesses to the platform Benjamins",
"contact": "Sean Duffy",
"pkb": false,
"docurl": "http://analyses.ezpaarse.org/platforms/64ad733df42178dc451bcf6a",
"domains": [
"www.benjamins.com",
"benjamins.com"
],
"version": "2023-07-23",
"status": "beta"
}
47 changes: 47 additions & 0 deletions ben/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env node

'use strict';
const Parser = require('../.lib/parser.js');

/**
* Recognizes the accesses to the platform Benjamins
* @param {Object} parsedUrl an object representing the URL to analyze
* main attributes: pathname, query, hostname
* @param {Object} ec an object representing the EC whose URL is being analyzed
* @return {Object} the result
*/
module.exports = new Parser(function analyseEC(parsedUrl, ec) {
let result = {};
let path = parsedUrl.pathname;
// uncomment this line if you need parameters
// let param = parsedUrl.query || {};

// use console.error for debuging
// console.error(parsedUrl);

let match;

if ((match = /^\/online\/([a-z]+)\/publications\/([0-9]+)$/i.exec(path)) !== null) {
// https://benjamins.com/online/etsb/publications/52963
result.rtype = 'RECORD';
result.mime = 'HTML';
result.db_id = match[1];
result.unitid = match[2];

} else if ((match = /^\/online\/([a-z]+)\/articles\/([a-z0-9]+)?(\.[a-z]+)?$/i.exec(path)) !== null) {
// https://www.benjamins.com/online/hts/articles/ada1
// https://benjamins.com/online/hts/articles/hyb1.fr
result.rtype = 'ARTICLE';
result.mime = 'HTML';
result.db_id = match[1];
result.title_id = match[1];
result.unitid = match[2];
} else if ((match = /^\/online\/([a-z]+)\/search$/i.exec(path)) !== null) {
// https://www.benjamins.com/online/hts/search?r=all&q=Adaption
result.rtype = 'SEARCH';
result.mime = 'HTML';
result.db_id = match[1];
}

return result;
});
5 changes: 5 additions & 0 deletions ben/test/ben.2023-07-23.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
out-db_id;out-title_id;out-unitid;out-rtype;out-mime;in-url
hts;hts;ada1;ARTICLE;HTML;https://www.benjamins.com/online/hts/articles/ada1
hts;hts;hyb1;ARTICLE;HTML;https://benjamins.com/online/hts/articles/hyb1.fr
hts;;;SEARCH;HTML;https://www.benjamins.com/online/hts/search?r=all&q=Adaption
etsb;;52963;RECORD;HTML;https://benjamins.com/online/etsb/publications/52963
26 changes: 22 additions & 4 deletions tair/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const Parser = require('../.lib/parser.js');
module.exports = new Parser(function analyseEC(parsedUrl) {
let result = {};
let path = parsedUrl.pathname;
let params = parsedUrl.query || {};
let match;

if ((match = /^\/(index).jsp$/.exec(path)) !== null) {
Expand All @@ -20,12 +21,29 @@ module.exports = new Parser(function analyseEC(parsedUrl) {
result.mime = 'HTML';
result.rtype = match[1].toUpperCase();

} else if ((match = /^\/servlets\/(Search)$/.exec(path)) !== null) {
// hhttp://www.arabidopsis.org.gate1.inist.fr/servlets/Search?type=general&action=new_search
result.rtype = match[1].toUpperCase();
} else if (/^\/servlets\/Search$/.test(path)) {
// http://www.arabidopsis.org.gate1.inist.fr/servlets/Search?type=general&action=new_search
result.rtype = 'SEARCH';
result.mime = 'HTML';

} else if (/^\/servlets\/TairObject$/.test(path)) {
// /servlets/TairObject?type=gene_class_symbol&id=1005832620
// /servlets/TairObject?type=transposon&id=1866
// /servlets/TairObject?type=locus&name=At1g44170
const type = params.type || '';
const id = params.id || params.name || '';

result.rtype = 'RECORD';
result.mime = 'HTML';
result.unitid = 'servlets/' + match[1];
result.unitid = `${type}/${id}`;

} else if ((match = /^\/download_files(\/.*)$/.exec(path)) !== null) {
// /download_files/Genes/Araport11_genome_release/Araport11_blastsets/Araport11_cds_20220914.gz
result.rtype = 'RECORD';
result.mime = 'MISC';
result.unitid = match[1];
}

return result;
});

16 changes: 10 additions & 6 deletions tair/test/tail.2014-05-19.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
out-unitid;out-mime;out-rtype;in-url
index;HTML;INDEX;http://www.arabidopsis.org.gate1.inist.fr/index.jsp
servlets/Search;HTML;SEARCH;http://www.arabidopsis.org.gate1.inist.fr/servlets/Search?type=general&action=new_search
tools/index;HTML;TOOLS;http://www.arabidopsis.org.gate1.inist.fr/tools/index.jsp
portals/index;HTML;PORTALS;http://www.arabidopsis.org.gate1.inist.fr/portals/index.jsp
download/index;HTML;DOWNLOAD;http://www.arabidopsis.org.gate1.inist.fr/download/index.jsp
submit/index;HTML;SUBMIT;http://www.arabidopsis.org.gate1.inist.fr/submit/index.jsp
index;HTML;INDEX;http://www.arabidopsis.org/index.jsp
;HTML;SEARCH;http://www.arabidopsis.org/servlets/Search?type=general&action=new_search
tools/index;HTML;TOOLS;http://www.arabidopsis.org/tools/index.jsp
portals/index;HTML;PORTALS;http://www.arabidopsis.org/portals/index.jsp
download/index;HTML;DOWNLOAD;http://www.arabidopsis.org/download/index.jsp
submit/index;HTML;SUBMIT;http://www.arabidopsis.org/submit/index.jsp
gene_class_symbol/1005832620;HTML;RECORD;https://www.arabidopsis.org/servlets/TairObject?type=gene_class_symbol&id=1005832620
transposon/1866;HTML;RECORD;https://www.arabidopsis.org/servlets/TairObject?type=transposon&id=1866
locus/At1g44170;HTML;RECORD;https://www.arabidopsis.org/servlets/TairObject?type=locus&name=At1g44170
/Genes/Araport11_genome_release/Araport11_blastsets/Araport11_cds_20220914.gz;MISC;RECORD;https://www.arabidopsis.org/download_files/Genes/Araport11_genome_release/Araport11_blastsets/Araport11_cds_20220914.gz

0 comments on commit 7dc978e

Please sign in to comment.