Skip to content

Commit

Permalink
Merge pull request zotero#2263 from adam3smith/JSTOR-update
Browse files Browse the repository at this point in the history
Fix PDFs for chapters & search
  • Loading branch information
adam3smith committed Oct 14, 2020
2 parents 299b34a + 83d7f41 commit 7900fbe
Showing 1 changed file with 66 additions and 24 deletions.
90 changes: 66 additions & 24 deletions JSTOR.js
Expand Up @@ -9,7 +9,7 @@
"inRepository": true,
"translatorType": 4,
"browserSupport": "gcsibv",
"lastUpdated": "2019-12-22 17:00:53"
"lastUpdated": "2020-10-14 13:32:58"
}

/*
Expand Down Expand Up @@ -64,7 +64,10 @@ function detectWeb(doc, url) {

function getSearchResults(doc, checkOnly) {
var resultsBlock = doc.querySelectorAll('.media-body.media-object-section');
if (!resultsBlock) return false;
if (!resultsBlock.length) {
resultsBlock = doc.querySelectorAll('.search-result-item-grid');
}
if (!resultsBlock.length) return false;
var items = {}, found = false;
for (let i = 0; i < resultsBlock.length; i++) {
let title = text(resultsBlock[i], '.title, .small-heading').trim();
Expand Down Expand Up @@ -157,6 +160,7 @@ function processRIS(text, jid) {
var review = text.match(/^RI\s+-\s+(.+)/m);
// sometimes we have subtitles stored in T1. These are part of the title, we want to add them later
var subtitle = text.match(/^T1\s+-\s+(.+)/m);
var maintitle = text.match(/^TI\s+-\s+(.+)/m);
translator.setString(text);
translator.setHandler("itemDone", function (obj, item) {
// author names are not (always) supplied as lastName, firstName in RIS
Expand All @@ -180,27 +184,27 @@ function processRIS(text, jid) {
}
// Don't save HTML snapshot from 'UR' tag
item.attachments = [];
var pdfurl = attr('a.pdfLink', 'href');
if (/stable\/(\d+)/.test(item.url)) {
pdfurl = "/stable/pdfplus/" + jid + ".pdf?acceptTC=true";
}
if (pdfurl) {
// not currently using but that's where the PDF link is
// var pdfurl = attr('a[data-qa="download-pdf"]', 'href');
// Books don't have PDFs
if (/stable\/([a-z0-9.]+)/.test(item.url) & item.itemType != "book") {
let pdfurl = "/stable/pdfplus/" + jid + ".pdf?acceptTC=true";
item.attachments.push({
url: pdfurl,
title: "JSTOR Full Text PDF",
mimeType: "application/pdf"
});
}

if (item.ISSN) {
item.ISSN = ZU.cleanISSN(item.ISSN);
}

// Only the DOIs mentioned in RIS are valid, and we don't
// add any other jid for DOI because they are only internal.

if (subtitle) {
item.title = item.title + ": " + subtitle[1];
if (maintitle && subtitle) {
item.title = maintitle[1] + ": " + subtitle[1];
}
// reviews don't have titles in RIS - we get them from the item page
if (!item.title && review) {
Expand All @@ -221,26 +225,73 @@ function processRIS(text, jid) {
item.title = "Review of " + reviewedTitle;
}

// titles may also contain escape characters
item.title = convertCharRefs(item.title);
item.url = item.url.replace('http:', 'https:'); // RIS still lists http addresses while JSTOR's stable URLs use https
if (item.url && !item.url.startsWith("http")) item.url = "https://" + item.url;
item.complete();

// DB in RIS maps to archive; we don't want that
delete item.archive;
if (item.DOI || /DOI: 10\./.test(item.extra)) {
finalizeItem(item);
}
else {
item.complete();
}
});

translator.getTranslatorObject(function (trans) {
trans.doImport();
});
}

function finalizeItem(item) {
// Validate DOI
let doi = item.DOI || item.extra.match(/DOI: (10\..+)/)[1];
Zotero.debug("Validating DOI " + doi);
// This just returns two lines of JSON
ZU.doGet('https://doi.org/doiRA/' + encodeURIComponent(doi),
function (text) {
// Z.debug(text)
try {
var ra = JSON.parse(text);
// Z.debug(ra[0].status)
if (!ra[0] || ra[0].status == "DOI does not exist") {
Z.debug("DOI " + doi + " does not exist");
if (item.DOI) {
delete item.DOI;
}
else {
item.extra = item.extra.replace(/DOI: 10\..+\n?/, "");
}
}
}
catch (e) {
if (item.DOI) {
delete item.DOI;
}
else {
item.extra.replace(/DOI: 10\..+\n?/, "");
}
Zotero.debug("Could not parse JSON. Probably invalid DOI");
}
}, function () {
item.complete();
}
);
}

/** BEGIN TEST CASES **/
var testCases = [
{
"type": "web",
"url": "http://www.jstor.org/action/doBasicSearch?Query=chicken&Search.x=0&Search.y=0&wc=on",
"defer": true,
"items": "multiple"
},
{
"type": "web",
"url": "https://www.jstor.org/stable/1593514?&Search=yes&searchText=chicken&list=hide&searchUri=%2Faction%2FdoBasicSearch%3FQuery%3Dchicken%26Search.x%3D0%26Search.y%3D0%26wc%3Don&prevSearch=&item=1&ttl=70453&returnArticleService=showFullText",
"url": "https://www.jstor.org/stable/1593514?Search=yes&searchText=chicken&list=hide&searchUri=%2Faction%2FdoBasicSearch%3FQuery%3Dchicken%26Search.x%3D0%26Search.y%3D0%26wc%3Don&prevSearch=&item=1&ttl=70453&returnArticleService=showFullText#metadata_info_tab_contents",
"items": [
{
"itemType": "journalArticle",
Expand All @@ -265,7 +316,6 @@ var testCases = [
"date": "2004",
"ISSN": "0005-2086",
"abstractNote": "A reproducible and original method for the preparation of chicken intestine epithelial cells from 18-day-old embryos for long-term culture was obtained by using a mechanical isolation procedure, as opposed to previous isolation methods using relatively high concentrations of trypsin, collagenase, or EDTA. Chicken intestine epithelial cells typically expressed keratin and chicken E-cadherin, in contrast to chicken embryo fibroblasts, and they increased cell surface MHC II after activation with crude IFN-γ containing supernatants, obtained from chicken spleen cells stimulated with concanavalin A or transformed by reticuloendotheliosis virus. Eimeria tenella was shown to be able to develop until the schizont stage after 46 hr of culture in these chicken intestinal epithelial cells, but it was not able to develop further. However, activation with IFN-γ containing supernatants resulted in strong inhibition of parasite replication, as shown by incorporation of [3 H]uracil. Thus, chicken enterocytes, which are the specific target of Eimeria development in vivo, could be considered as potential local effector cells involved in the protective response against this parasite. /// Se desarrolló un método reproducible y original para la preparación de células epiteliales de intestino de embriones de pollo de 18 días de edad para ser empleadas como cultivo primario de larga duración. Las células epiteliales de intestino fueron obtenidas mediante un procedimiento de aislamiento mecánico, opuesto a métodos de aislamientos previos empleando altas concentraciones de tripsina, colagenasa o EDTA. Las células epiteliales de intestino expresaron típicamente keratina y caderina E, a diferencia de los fibroblastos de embrión de pollo, e incrementaron el complejo mayor de histocompatibilidad tipo II en la superficie de la célula posterior a la activación con sobrenadantes de interferón gamma. Los sobrenadantes de interferón gamma fueron obtenidos a partir de células de bazos de pollos estimuladas con concanavalina A o transformadas con el virus de reticuloendoteliosis. Se observó el desarrollo de la Eimeria tenella hasta la etapa de esquizonte después de 46 horas de cultivo en las células intestinales epiteliales de pollo pero no se observó un desarrollo posterior. Sin embargo, la activación de los enterocitos con los sobrenadantes con interferón gamma resultó en una inhibición fuerte de la replicación del parásito, comprobada mediante la incorporación de uracilo [3 H]. Por lo tanto, los enterocitos de pollo, blanco específico del desarrollo in vivo de la Eimeria, podrían ser considerados como células efectoras locales, involucradas en la respuesta protectora contra este parásito.",
"archive": "JSTOR",
"issue": "3",
"libraryCatalog": "JSTOR",
"pages": "617-624",
Expand Down Expand Up @@ -378,7 +428,6 @@ var testCases = [
],
"date": "1998",
"ISSN": "0036-0341",
"archive": "JSTOR",
"issue": "2",
"libraryCatalog": "JSTOR",
"pages": "310-311",
Expand Down Expand Up @@ -424,7 +473,6 @@ var testCases = [
"date": "2005",
"ISSN": "0742-1222",
"abstractNote": "From the social network perspective, this study explores the ontological structure of knowledge sharing activities engaged in by researchers in the field of information systems (IS) over the past three decades. We construct a knowledge network based on coauthorship patterns extracted from four major journals in the IS field in order to analyze the distinctive characteristics of each subfield and to assess the amount of internal and external knowledge exchange that has taken place among IS researchers. This study also tests the role of different types of social capital that influence the academic impact of researchers. Our results indicate that the proportion of coauthored IS articles in the four journals has doubled over the past 25 years, from merely 40 percent in 1978 to over 80 percent in 2002. However, a significant variation exists in terms of the shape, density, and centralization of knowledge exchange networks across the four subfields of IS—namely, behavioral science, organizational science, computer science, and economic science. For example, the behavioral science subgroup, in terms of internal cohesion among researchers, tends to develop the most dense collaborative relationships, whereas the computer science subgroup is the most fragmented. Moreover, external collaboration across these subfields appears to be limited and severely unbalanced. Across the four subfields, on average, less than 20 percent of the research collaboration ties involved researchers from different subdisciplines. Finally, the regression analysis reveals that knowledge capital derived from a network rich in structural holes has a positive influence on an individual researcher's academic performance.",
"archive": "JSTOR",
"issue": "3",
"libraryCatalog": "JSTOR",
"pages": "265-292",
Expand All @@ -447,6 +495,7 @@ var testCases = [
{
"type": "web",
"url": "http://www.jstor.org/action/doBasicSearch?Query=%28solomon+criminal+justice%29+AND+disc%3A%28slavicstudies-discipline+OR+history-discipline%29&prq=%28criminal+justice%29+AND+disc%3A%28slavicstudies-discipline+OR+history-discipline%29&hp=25&acc=on&wc=on&fc=off&so=rel&racc=off",
"defer": true,
"items": "multiple"
},
{
Expand Down Expand Up @@ -543,17 +592,11 @@ var testCases = [
],
"date": "2012",
"abstractNote": "Siddharth Kara's <i>Sex Trafficking</i> has become a critical resource for its revelations into an unconscionable business, and its detailed analysis of the trade's immense economic benefits and human cost. This volume is Kara's second, explosive study of slavery, this time focusing on the deeply entrenched and wholly unjust system of bonded labor. Drawing on eleven years of research in India, Nepal, Bangladesh, and Pakistan, Kara delves into an ancient and ever-evolving mode of slavery that ensnares roughly six out of every ten slaves in the world and generates profits that exceeded $17.6 billion in 2011. In addition to providing a thorough economic, historical, and legal overview of bonded labor, Kara travels to the far reaches of South Asia, from cyclone-wracked southwestern Bangladesh to the Thar desert on the India-Pakistan border, to uncover the brutish realities of such industries as hand-woven-carpet making, tea and rice farming, construction, brick manufacture, and frozen-shrimp production. He describes the violent enslavement of millions of impoverished men, women, and children who toil in the production of numerous products at minimal cost to the global market. He also follows supply chains directly to Western consumers, vividly connecting regional bonded labor practices to the appetites of the world. Kara's pioneering analysis encompasses human trafficking, child labor, and global security, and he concludes with specific initiatives to eliminate the system of bonded labor from South Asia once and for all.",
"archive": "JSTOR",
"libraryCatalog": "JSTOR",
"publisher": "Columbia University Press",
"shortTitle": "Bonded Labor",
"url": "https://www.jstor.org/stable/10.7312/kara15848",
"attachments": [
{
"title": "JSTOR Full Text PDF",
"mimeType": "application/pdf"
}
],
"attachments": [],
"tags": [],
"notes": [],
"seeAlso": []
Expand All @@ -576,7 +619,6 @@ var testCases = [
],
"date": "1988",
"abstractNote": "What do long-distance travelers gain from their voyages, especially when faraway lands are regarded as the source of esoteric knowledge? Mary Helms explains how various cultures interpret space and distance in cosmological terms, and why they associate political power with information about strange places, peoples, and things. She assesses the diverse goals of travelers, be they Hindu pilgrims in India, Islamic scholars of West Africa, Navajo traders, or Tlingit chiefs, and discusses the most extensive experience of longy2Ddistance contact on record--that between Europeans and native peoples--and the clash of cultures that arose from conflicting expectations about the \"faraway.\". The author describes her work as \"especially concerned with the political and ideological contexts or auras within which long-distance interests and activities may be conducted .. Not only exotic materials but also intangible knowledge of distant realms and regions can be politically valuable `goods,' both for those who have endured the perils of travel and for those sedentary homebodies who are able to acquire such knowledge by indirect means and use it for political advantage.\" Originally published in 1988. ThePrinceton Legacy Libraryuses the latest print-on-demand technology to again make available previously out-of-print books from the distinguished backlist of Princeton University Press. These paperback editions preserve the original texts of these important books while presenting them in durable paperback editions. The goal of the Princeton Legacy Library is to vastly increase access to the rich scholarly heritage found in the thousands of books published by Princeton University Press since its founding in 1905.",
"archive": "JSTOR",
"libraryCatalog": "JSTOR",
"publisher": "Princeton University Press",
"shortTitle": "Ulysses' Sail",
Expand All @@ -591,6 +633,7 @@ var testCases = [
{
"type": "web",
"url": "http://www.jstor.org/action/doAdvancedSearch?q3=&re=on&q4=&f3=all&c3=AND&group=none&q1=&f5=all&c5=AND&la=&q2=&c6=AND&sd=&c2=AND&c1=AND&pt=&acc=off&q6=&q5=&c4=AND&f6=all&f0=all&q0=%22Reading+Rousseau+in+the+nuclear+age%22&f4=all&ed=&f2=all&f1=all&isbn=",
"defer": true,
"items": "multiple"
},
{
Expand All @@ -610,7 +653,6 @@ var testCases = [
"date": "1989",
"ISSN": "0024-2519",
"abstractNote": "Bibliographic references are an accepted part of scholarly publication. As such, they have been used for information retrieval, studies of scientific communication, collection development decisions, and even determination of salary raises, as well as for their primary purpose of documentation of authors' claims. However, there appears to be a high percentage of errors in these citations, seen in evidence from the mid-nineteenth century to the present. Such errors can be traced to a lack of standardization in citation formats, misunderstanding of foreign languages, general human inabilities to reproduce long strings of information correctly, and failure to examine the document cited, combined with a general lack of training in the norms of citation. The real problem, the failure to detect and correct citation errors, is due to a diffusion of responsibility in the publishing process.",
"archive": "JSTOR",
"issue": "4",
"libraryCatalog": "JSTOR",
"pages": "291-304",
Expand Down

0 comments on commit 7900fbe

Please sign in to comment.