Permalink
Browse files

Adding simple support for major Israeli newspapers (Haaretz, TheMarke…

…r, Ynet)
  • Loading branch information...
eranroz committed Aug 15, 2015
1 parent 0ac84b5 commit 1f6ccfefa102605b7e09b73abaea9247832b120d
Showing with 342 additions and 0 deletions.
  1. +147 −0 Haaretz.js
  2. +105 −0 TheMarker.js
  3. +90 −0 Ynet.js
View
@@ -0,0 +1,147 @@
{
"translatorID": "d6f64d96-aa6f-4fd3-816f-bdef842c7088",
"label": "Haaretz",
"creator": "Eran Rosenthal",
"target": "^https?://www\\.haaretz\\.(co\\.il|com)/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-08-17 22:01:10"
}
/**
Copyright (c) 2015 Eran Rosenthal
This program is free software: you can redistribute it and/or
modify it under the terms of the GNU Affero General Public License
as published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with this program. If not, see
<http://www.gnu.org/licenses/>.
*/
function detectWeb(doc, url) {
if (ZU.xpathText(doc, '//header//h1')) {
return 'newspaperArticle';
}
}
function doWeb(doc, url) {
var item = new Zotero.Item('newspaperArticle');
item.title = ZU.xpathText(doc, '//header//h1');
item.url = url;
if(url.indexOf('haaretz.com') != -1) {
item.publicationTitle = 'Haaretz';
item.language = 'en';
} else{
item.publicationTitle = 'הארץ';
item.language = 'he';
}
var abstract = ZU.xpathText(doc, '//header/p');
if (!abstract) abstract = ZU.xpathText(doc, '//meta[@property="og:description"]/@content');
item.abstractNote = abstract;
var authors = ZU.xpath(doc, '//address/a[@rel="author"]');
for(var i=0; i<authors.length; i++) {
item.creators.push(ZU.cleanAuthor(authors[i].textContent, 'author'));
}
item.date = ZU.xpathText(doc, '//time[@itemprop="datePublished"]/@datetime').split('T')[0];
var keywords = ZU.xpathText(doc, '//head/meta[@name="news_keywords"]/@content').split(',');
for(var i=0; i<keywords.length; i++) {
if(keywords[i].length>0) item.tags.push(keywords[i].trim());
}
item.complete();
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.haaretz.com/news/diplomacy-defense/1.671202",
"items": [
{
"itemType": "newspaperArticle",
"title": "Islamic Jihad: If Hunger Striker Dies, We'll Respond With Force Against Israel",
"creators": [
{
"firstName": "Jack",
"lastName": "Khoury",
"creatorType": "author"
},
{
"firstName": "Shirly",
"lastName": "Seidler",
"creatorType": "author"
},
{
"firstName": "Ido",
"lastName": "Efrati",
"creatorType": "author"
}
],
"date": "2015-08-14",
"abstractNote": "Islamic Jihad says it will no longer be committed to maintaining calm if Mohammed Allaan, who lost consciousness after 60-day hunger strike, dies.",
"language": "en",
"libraryCatalog": "Haaretz",
"publicationTitle": "Haaretz",
"shortTitle": "Islamic Jihad",
"url": "http://www.haaretz.com/news/diplomacy-defense/1.671202",
"attachments": [],
"tags": [
"Palestinian hunger strike"
],
"notes": [],
"seeAlso": []
}
]
},
{
"type": "web",
"url": "http://www.haaretz.co.il/news/politics/1.2708080",
"items": [
{
"itemType": "newspaperArticle",
"title": "פלסטיני דקר חייל ופצע אותו באורח קל בכביש 443 סמוך לבית חורון",
"creators": [
{
"firstName": "גילי",
"lastName": "כהן",
"creatorType": "author"
},
{
"firstName": "עמירה",
"lastName": "הס",
"creatorType": "author"
}
],
"date": "2015-08-15",
"abstractNote": "כוח צה\"ל שהיה במקום פתח באש לעבר הפלסטיני ופצע אותו באורח קל, והוא נעצר. החייל והדוקר פונו לבית החולים שערי צדק. בתחילת השבוע נדקר באזור צעיר ישראלי נוסף שנפצע בינוני",
"language": "he",
"libraryCatalog": "Haaretz",
"publicationTitle": "הארץ",
"url": "http://www.haaretz.co.il/news/politics/1.2708080",
"attachments": [],
"tags": [
"טרור",
"פיגוע",
"פלסטינים",
"צה\"ל"
],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/
View
@@ -0,0 +1,105 @@
{
"translatorID": "b2d61bb5-5b21-41b7-9c83-1abcbf14639b",
"label": "TheMarker",
"creator": "Eran Rosenthal",
"target": "^https?://www\\.themarker\\.com/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-08-17 22:04:49"
}
/**
Copyright (c) 2015 Eran Rosenthal
This program is free software: you can redistribute it and/or
modify it under the terms of the GNU Affero General Public License
as published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with this program. If not, see
<http://www.gnu.org/licenses/>.
*/
function detectWeb(doc, url) {
if (ZU.xpathText(doc, '//header//h1')) {
return "magazineArticle";
}
}
function doWeb(doc, url) {
newItem = new Zotero.Item("magazineArticle");
newItem.url = url;
newItem.publicationTitle = "TheMarker";
newItem.title = ZU.xpathText(doc, '//header//h1');
var abstract = ZU.xpathText(doc, '//header/p');
if (!abstract) abstract = ZU.xpathText(doc, '//meta[@property="og:description"]/@content');
newItem.abstractNote = abstract;
newItem.date = ZU.xpathText(doc, '//time[@itemprop="datePublished"]/@datetime').split('T')[0];
var authors = ZU.xpath(doc, '//address/a[@rel="author"]');
for(var i=0; i<authors.length; i++) {
newItem.creators.push(ZU.cleanAuthor(authors[i].textContent, 'author'));
}
var keywords = ZU.xpathText(doc, '//head/meta[@name="news_keywords"]/@content').split(',');
for(var i=0; i<keywords.length; i++) {
if(keywords[i].length>0) newItem.tags.push(keywords[i].trim());
}
newItem.attachments = [{
document: doc,
title: "TheMarker"
}];
newItem.complete();
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.themarker.com/markerweek/thisweek/1.2707370",
"items": [
{
"itemType": "magazineArticle",
"title": "השופט גרוסקופף מציג: הבובות של נוחי דנקנר",
"creators": [
{
"firstName": "גיא",
"lastName": "רולניק",
"creatorType": "author"
}
],
"date": "2015-08-15",
"abstractNote": "כאשר במשק יש ריכוזי כוח כלכלי ופוליטי, לאיש אין עניין לצעוק שהמלך הוא עירום, ורוב האנשים יעדיפו לשכנע את עצמם שאלה בגדי המלך החדשים והיפים",
"libraryCatalog": "TheMarker",
"publicationTitle": "TheMarker",
"shortTitle": "השופט גרוסקופף מציג",
"url": "http://www.themarker.com/markerweek/thisweek/1.2707370",
"attachments": [
{
"title": "TheMarker"
}
],
"tags": [
"ריכוזיות"
],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/
View
90 Ynet.js
@@ -0,0 +1,90 @@
{
"translatorID": "7f45c3f9-e387-4589-9679-225ddcf6f00e",
"label": "Ynet",
"creator": "Eran Rosenthal",
"target": "^https?://www\\.ynet\\.co\\.il/articles/",
"minVersion": "3.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2015-08-15 17:25:20"
}
/**
Copyright (c) 2015 Eran Rosenthal
This program is free software: you can redistribute it and/or
modify it under the terms of the GNU Affero General Public License
as published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public
License along with this program. If not, see
<http://www.gnu.org/licenses/>.
*/
function detectWeb(doc, url) {
return 'newspaperArticle';
}
function doWeb(doc, url) {
var item = new Zotero.Item('newspaperArticle');
item.title = ZU.xpathText(doc, '//meta[@property="og:title"]/@content');
item.publicationTitle = 'Ynet';
item.url = url;
item.language = 'he';
var abstract = ZU.xpathText(doc, '//div[@class="art_header_sub_title"]');
if (!abstract) abstract = ZU.xpathText(doc, '//meta[@property="og:description"]/@content');
item.abstractNote = abstract;
var author = ZU.xpathText(doc, '//div[@class="art_header_footer"]//a');
if (author) {
item.creators.push(Zotero.Utilities.cleanAuthor(author, 'author'));
}
var kakyDate = ZU.xpathText(doc, '//meta[@property="vr:published_time"]/@content');
var dateSplit = /([0-9]{2})\.([0-9]{2})\.([0-9]{2})$/.exec(kakyDate);
if(dateSplit) {
// it is tricky but should work
item.date = ['20'+dateSplit[3], dateSplit[2], dateSplit[1]].join('-');
}
item.complete();
}
/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.ynet.co.il/articles/0,7340,L-4690772,00.html",
"items": [
{
"itemType": "newspaperArticle",
"title": "תעלומת הקצין מארה\"ב, הסודות והמאהבת",
"creators": [
{
"firstName": "ירון",
"lastName": "דרוקמן",
"creatorType": "author"
}
],
"date": "2015-08-15",
"abstractNote": "הכותרות בישרו השבוע לפני 15 שנים על פרשת ריגול מהסרטים: אמריקני-יהודי שהתגייר, בא לישראל עם חומר סודי ביותר ומטריף את הממשל מדאגה. חברתו העידה שהסתובב עם פאה ושפם והתקשר אליה מטלפונים ציבוריים לסניף הדואר. בסוף מצאו אותו במצפה רמון, והאמת התבררה. בערך",
"language": "he",
"libraryCatalog": "Ynet",
"publicationTitle": "Ynet",
"url": "http://www.ynet.co.il/articles/0,7340,L-4690772,00.html",
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
}
]
}
]
/** END TEST CASES **/

0 comments on commit 1f6ccfe

Please sign in to comment.