Skip to content
This repository has been archived by the owner on May 30, 2021. It is now read-only.

feat: add ArcGIS REST API fetch function #835

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coronadatascraper-cache
5 changes: 5 additions & 0 deletions src/shared/lib/datetime/iso/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@ import { now } from './now.js';
import { parse } from './parse.js';
import { looksLike } from './looks-like.js';

// This is here to avoid cache misses with Quentin's ArcGIS JSON Pagination
// This can presumably go away in li.
const ARCGIS_PAGINATION_DEPLOY_DATE = '2020-04-19';

export default {
ARCGIS_PAGINATION_DEPLOY_DATE,
dateIsBefore,
dateIsBeforeOrEqualTo,
getDate,
Expand Down
53 changes: 53 additions & 0 deletions src/shared/lib/fetch/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,56 @@ export const getArcGISCSVURL = async function(scraper, serverNumber, dashboardId
const { orgId } = dashboardManifest;
return getArcGISCSVURLFromOrgId(scraper, serverNumber, orgId, layerName);
};

/**
* Retrieves data from an ArcGIS REST API. By default, it will retrieve all items at the provided linked with geometry turned off.
* You can control pagination size through the `featuresToFetch` parameter in `options`, but this should not be necessary -
* by default, this will make the largest request allowed by the source.
* @param {*} scraper the scraper object
* @param {string} featureLayerURL URL of the resource, up to and including the feature layer number and `query`, e.g.
* https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query
* @param {*} date the date associated with this resource, or false if a timeseries data
shaperilio marked this conversation as resolved.
Show resolved Hide resolved
* @param {object} options customizable options:
* - featuresToFetch: number of features we want to receive for each request. A smaller number means more request to grab the complete dataset,
* a larger number may result in a partial dataset if we request more than `Max Record Count`. Defaults to 500.
* - additionalParams: additional parameters for this request. Defaults to `where=0%3D0&outFields=*&returnGeometry=false`.
* - alwaysRun: fetches from URL even if resource is in cache, defaults to false
* - disableSSL: disables SSL verification for this resource, should be avoided
*/
export const arcGISJSON = async (scraper, featureLayerURL, cacheKey = 'default', date, options = {}) => {
const { featuresToFetch, additionalParams } = {
featuresToFetch: undefined,
additionalParams: 'where=0%3D0&outFields=*&returnGeometry=false',
...options
};

if (featureLayerURL.search(/\/query$/) === -1) {
throw new Error(`Invalid URL: "${featureLayerURL}" does not end with "query"`);
}

let url = `${featureLayerURL.replace(/\?.*$/, '')}?f=json${additionalParams ? `&${additionalParams}` : ''}`;

// Won't get anything back without these.
if (url.search('where=') === -1) url += '&where=0%3D0';
if (url.search('outFields=') === -1) url += '&outFields=*';

// Note also that if any query parameters are in there twice, you get a 400 back.
const output = [];

let n = 0;
let fetchURL = `${url}&resultOffset=${n}`;
if (featuresToFetch) fetchURL += `&resultRecordCount=${featuresToFetch}`;
let response = await json(scraper, fetchURL, cacheKey, date, options);

while (response && response.features && response.features.length > 0) {
n += response.features.length;
output.push(...response.features.map(({ attributes }) => attributes));

fetchURL = `${url}&resultOffset=${n}`;
if (featuresToFetch) fetchURL += `&resultRecordCount=${featuresToFetch}`;

response = await json(scraper, fetchURL, cacheKey, date, options);
}

return output;
};
24 changes: 18 additions & 6 deletions src/shared/scrapers/JP/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import assert from 'assert';
import * as fetch from '../../lib/fetch/index.js';
import * as transform from '../../lib/transform.js';
import maintainers from '../../lib/maintainers.js';
import datetime from '../../lib/datetime/index.js';

/**
* Hand rolled version of _.groupBy
Expand Down Expand Up @@ -45,13 +46,24 @@ const scraper = {
}
],
type: 'json',
url:
'https://services8.arcgis.com/JdxivnCyd1rvJTrY/arcgis/rest/services/v2_covid19_list_csv/FeatureServer/0/query?where=0%3D0&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson',
url: 'https://services8.arcgis.com/JdxivnCyd1rvJTrY/arcgis/rest/services/v2_covid19_list_csv/FeatureServer/0/query',
async scraper() {
const data = await fetch.json(this, this.url, 'default');
assert(data, 'No data fetched');
assert(data.features.length > 1, 'features are unreasonable');
const attributes = data.features.map(({ attributes }) => attributes);
const date = datetime.getYYYYMMDD(process.env.SCRAPE_DATE);
let attributes;
if (datetime.dateIsBefore(date, datetime.ARCGIS_PAGINATION_DEPLOY_DATE)) {
// FIXME: ugly hack to not get cache misses. We should be able to remove this in li.
this.url =
'https://services8.arcgis.com/JdxivnCyd1rvJTrY/arcgis/rest/services/v2_covid19_list_csv/FeatureServer/0/query?where=0%3D0&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields=*&returnGeometry=false&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson';
const data = await fetch.json(this, this.url, 'default');
assert(data, 'No data fetched');
assert(data.features.length > 1, 'features are unreasonable');
attributes = data.features.map(({ attributes }) => attributes);
} else {
const features = await fetch.arcGISJSON(this, this.url, 'default', false);
assert(features, 'No data fetched');
assert(features.length > 1, 'features are unreasonable');
attributes = features;
}
assert(attributes.length > 1, 'data fetch failed, no attributes');

const groupedByPrefecture = groupBy(attributes, attribute => attribute.Prefecture);
Expand Down
18 changes: 14 additions & 4 deletions src/shared/scrapers/LT/index.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import * as fetch from '../../lib/fetch/index.js';
import * as transform from '../../lib/transform.js';
import maintainers from '../../lib/maintainers.js';
import datetime from '../../lib/datetime/index.js';

import mapping from './mapping.json';

const scraper = {
country: 'iso1:LT',
url:
'https://services.arcgis.com/XdDVrnFqA9CT3JgB/arcgis/rest/services/covid_locations/FeatureServer/0/query?f=json&where=1%3D1&outFields=*&returnGeometry=false',
url: 'https://services.arcgis.com/XdDVrnFqA9CT3JgB/arcgis/rest/services/covid_locations/FeatureServer/0/query',
priority: 1,
type: 'csv',
sources: [
Expand All @@ -18,8 +18,18 @@ const scraper = {
],
maintainers: [maintainers.qgolsteyn],
async scraper() {
const casesRaw = await fetch.json(this, this.url, 'default');
const casesData = casesRaw.features.map(({ attributes }) => attributes);
const date = datetime.getYYYYMMDD(process.env.SCRAPE_DATE);

let casesData;
if (datetime.dateIsBefore(date, datetime.ARCGIS_PAGINATION_DEPLOY_DATE)) {
// FIXME: ugly hack to not get cache misses. We should be able to remove this in li.
this.url =
'https://services.arcgis.com/XdDVrnFqA9CT3JgB/arcgis/rest/services/covid_locations/FeatureServer/0/query?f=json&where=1%3D1&outFields=*&returnGeometry=false';
const casesRaw = await fetch.json(this, this.url, 'default');
casesData = casesRaw.features.map(({ attributes }) => attributes);
} else {
casesData = await fetch.arcGISJSON(this, this.url, 'default', false);
}

const casesByRegion = {};

Expand Down
18 changes: 14 additions & 4 deletions src/shared/scrapers/LV/index.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import * as fetch from '../../lib/fetch/index.js';
import maintainers from '../../lib/maintainers.js';
import datetime from '../../lib/datetime/index.js';

import mapping from './mapping.json';

const scraper = {
country: 'iso1:LV',
url:
'https://services7.arcgis.com/g8j6ESLxQjUogx9p/arcgis/rest/services/Latvia_covid_novadi/FeatureServer/0/query?f=json&where=1%3D1&outFields=*&returnGeometry=false',
url: 'https://services7.arcgis.com/g8j6ESLxQjUogx9p/arcgis/rest/services/Latvia_covid_novadi/FeatureServer/0/query',
priority: 1,
type: 'csv',
maintainers: [maintainers.qgolsteyn],
Expand All @@ -17,8 +17,18 @@ const scraper = {
}
],
async scraper() {
const casesRaw = await fetch.json(this, this.url, 'default');
const casesData = casesRaw.features.map(({ attributes }) => attributes);
const date = datetime.getYYYYMMDD(process.env.SCRAPE_DATE);

let casesData;
if (datetime.dateIsBefore(date, datetime.ARCGIS_PAGINATION_DEPLOY_DATE)) {
// FIXME: ugly hack to not get cache misses. We should be able to remove this in li.
this.url =
'https://services7.arcgis.com/g8j6ESLxQjUogx9p/arcgis/rest/services/Latvia_covid_novadi/FeatureServer/0/query?f=json&where=1%3D1&outFields=*&returnGeometry=false';
const casesRaw = await fetch.json(this, this.url, 'default');
casesData = casesRaw.features.map(({ attributes }) => attributes);
} else {
casesData = await fetch.arcGISJSON(this, this.url, 'default', false);
}

const data = [];

Expand Down
16 changes: 9 additions & 7 deletions src/shared/scrapers/LV/mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
"Gulbenes novads": "iso2:LV-033",
"Smiltenes novads": "iso2:LV-094",
"Alūksnes novads": "iso2:LV-007",
"Ventspils": "iso2:LV-VEN",
"Saulkrastu novads": "iso2:LV-089",
"Talsu novads": "iso2:LV-097",
"Sējas novads": "iso2:LV-090",
Expand All @@ -26,7 +25,6 @@
"Ropažu novads": "iso2:LV-080",
"Baltinavas novads": "iso2:LV-014",
"Kandavas novads": "iso2:LV-043",
"Jūrmala": "iso2:LV-JUR",
"Cesvaines novads": "iso2:LV-021",
"Madonas novads": "iso2:LV-059",
"Babītes novads": "iso2:LV-012",
Expand Down Expand Up @@ -56,7 +54,6 @@
"Iecavas novads": "iso2:LV-034",
"Brocēnu novads": "iso2:LV-018",
"Ozolnieku novads": "iso2:LV-069",
"Jelgava": "iso2:LV-JEL",
"Rēzeknes novads": "iso2:LV-077",
"Dobeles novads": "iso2:LV-026",
"Krustpils novads": "iso2:LV-049",
Expand All @@ -69,7 +66,6 @@
"Vaiņodes novads": "iso2:LV-100",
"Vecumnieku novads": "iso2:LV-105",
"Jaunjelgavas novads": "iso2:LV-038",
"Jēkabpils": "iso2:LV-JKB",
"Nīcas novads": "iso2:LV-066",
"Jēkabpils novads": "iso2:LV-042",
"Neretas novads": "iso2:LV-065",
Expand All @@ -86,7 +82,6 @@
"Grobiņas novads": "iso2:LV-032",
"Riebiņu novads": "iso2:LV-078",
"Zilupes novads": "iso2:LV-110",
"Daugavpils": "iso2:LV-DGV",
"Bauskas novads": "iso2:LV-016",
"Rucavas novads": "iso2:LV-081",
"Līvānu novads": "iso2:LV-056",
Expand All @@ -110,10 +105,17 @@
"Engures novads": "iso2:LV-029",
"Amatas novads": "iso2:LV-008",
"Tērvetes novads": "iso2:LV-098",
"Valmiera": "iso2:LV-VMR",
"Mērsraga novads": "iso2:LV-063",
"Pļaviņu novads": "iso2:LV-072",
"Cēsu novads": "iso2:LV-022",
"Mazsalacas novads": "iso2:LV-060",
"Jūrmala": "iso2:LV-JUR",
"Jelgava": "iso2:LV-JEL",
"Jēkabpils": "iso2:LV-JKB",
"Daugavpils": "iso2:LV-DGV",
"Valmiera": "iso2:LV-VMR",
"Rīga": "iso2:LV-RIX",
"Mazsalacas novads": "iso2:LV-060"
"Ventspils": "iso2:LV-VEN",
"Liepāja": "iso2:LV-LPX",
"Rēzekne": "iso2:LV-REZ"
}
27 changes: 3 additions & 24 deletions src/shared/scrapers/PA/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -152,27 +152,6 @@ function sum(dataArray, key) {
return result;
}

async function TEMPfetchArcGISJSON(obj, featureURL, date) {
// temporary handling of pagination here until Quentin's pull request is brought in
let offset = 0;
const recordCount = 50000;
const result = [];
// eslint-disable-next-line no-constant-condition
while (true) {
const query = `where=0%3D0&outFields=*&resultOffset=${offset}&resultRecordCount=${recordCount}&f=json`;
const theURL = `${featureURL}query?${query}`;
const cacheKey = `arcGISJSON_cases_${offset}`;
const response = await fetch.json(obj, theURL, cacheKey, date);
if (!response) throw new Error(`Response was null for "${theURL}`);
if (response.features && response.features.length === 0) break;
const n = response.features.length;
log(`${n} records from "${theURL}`);
offset += n;
result.push(...response.features.map(({ attributes }) => attributes));
}
return result;
}

const scraper = {
priority: 1,
country: 'iso1:PA',
Expand All @@ -194,7 +173,7 @@ const scraper = {

// List of cases, this has most of the data that we want.
_caseListFeatureURL:
'https://services5.arcgis.com/aqOddbAz6HewRw8I/ArcGIS/rest/services/Casos_Covid19_PA/FeatureServer/0/',
'https://services5.arcgis.com/aqOddbAz6HewRw8I/ArcGIS/rest/services/Casos_Covid19_PA/FeatureServer/0/query',

// Time series at national level.
_timeSeriesUrl: 'https://opendata.arcgis.com/datasets/6b7f17658fd845058f7516d6fc591530_0.csv',
Expand Down Expand Up @@ -287,10 +266,10 @@ const scraper = {
// use datetime.old here, just like the caching system does.
if (datetime.dateIsBefore(scrapeDate, datetime.old.getDate())) {
// treat the data as a timeseries, so don't cache it.
caseList = await TEMPfetchArcGISJSON(this, this._caseListFeatureURL, false);
caseList = await fetch.arcGISJSON(this, this._caseListFeatureURL, 'default', false);
} else {
// fetch it the normal way so it gets cached.
caseList = await TEMPfetchArcGISJSON(this, this._caseListFeatureURL);
caseList = await fetch.arcGISJSON(this, this._caseListFeatureURL, 'default', false);
}
// Array of:
// {
Expand Down
15 changes: 11 additions & 4 deletions src/shared/scrapers/SE/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,24 @@ import mapping from './mapping.json';

const scraper = {
country: 'iso1:SE',
url:
'https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query?f=json&where=1%3D1&outFields=*&returnGeometry=false',
url: 'https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query',
priority: 1,
type: 'csv',
maintainers: [maintainers.qgolsteyn],
sources: [{ url: 'https://folkhalsomyndigheten.se', name: 'Public Health Agency of Sweden' }],
async scraper() {
const date = datetime.getYYYYMMDD(process.env.SCRAPE_DATE);

const casesRaw = await fetch.json(this, this.url, 'default', false);
const casesData = casesRaw.features.map(({ attributes }) => attributes);
let casesData;
if (datetime.dateIsBefore(date, datetime.ARCGIS_PAGINATION_DEPLOY_DATE)) {
// FIXME: ugly hack to not get cache misses. We should be able to remove this in li.
this.url =
'https://services5.arcgis.com/fsYDFeRKu1hELJJs/arcgis/rest/services/FOHM_Covid_19_FME_1/FeatureServer/1/query?f=json&where=1%3D1&outFields=*&returnGeometry=false';
const casesRaw = await fetch.json(this, this.url, 'default', false);
casesData = casesRaw.features.map(({ attributes }) => attributes);
} else {
casesData = await fetch.arcGISJSON(this, this.url, 'default', false);
}

const casesByRegion = {};

Expand Down

Large diffs are not rendered by default.

40 changes: 20 additions & 20 deletions src/shared/scrapers/SE/tests/expected.2020-04-13.json
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
[
{ "state": "iso2:SE-K", "cases": 41 },
{ "state": "iso2:SE-W", "cases": 345 },
{ "state": "iso2:SE-K", "cases": 42 },
{ "state": "iso2:SE-W", "cases": 365 },
{ "state": "iso2:SE-I", "cases": 16 },
{ "state": "iso2:SE-X", "cases": 290 },
{ "state": "iso2:SE-N", "cases": 185 },
{ "state": "iso2:SE-Z", "cases": 131 },
{ "state": "iso2:SE-F", "cases": 350 },
{ "state": "iso2:SE-H", "cases": 70 },
{ "state": "iso2:SE-G", "cases": 98 },
{ "state": "iso2:SE-BD", "cases": 137 },
{ "state": "iso2:SE-M", "cases": 449 },
{ "state": "iso2:SE-AB", "cases": 4575 },
{ "state": "iso2:SE-D", "cases": 668 },
{ "state": "iso2:SE-C", "cases": 465 },
{ "state": "iso2:SE-S", "cases": 77 },
{ "state": "iso2:SE-AC", "cases": 183 },
{ "state": "iso2:SE-Y", "cases": 105 },
{ "state": "iso2:SE-U", "cases": 346 },
{ "state": "iso2:SE-O", "cases": 1064 },
{ "state": "iso2:SE-X", "cases": 301 },
{ "state": "iso2:SE-N", "cases": 188 },
{ "state": "iso2:SE-Z", "cases": 137 },
{ "state": "iso2:SE-F", "cases": 355 },
{ "state": "iso2:SE-H", "cases": 76 },
{ "state": "iso2:SE-G", "cases": 103 },
{ "state": "iso2:SE-BD", "cases": 154 },
{ "state": "iso2:SE-M", "cases": 458 },
{ "state": "iso2:SE-AB", "cases": 4709 },
{ "state": "iso2:SE-D", "cases": 694 },
{ "state": "iso2:SE-C", "cases": 478 },
{ "state": "iso2:SE-S", "cases": 82 },
{ "state": "iso2:SE-AC", "cases": 187 },
{ "state": "iso2:SE-Y", "cases": 111 },
{ "state": "iso2:SE-U", "cases": 368 },
{ "state": "iso2:SE-O", "cases": 1101 },
{ "state": "iso2:SE-T", "cases": 383 },
{ "state": "iso2:SE-E", "cases": 970 },
{ "deaths": 906, "cases": 10948, "hospitalized": 859 }
{ "state": "iso2:SE-E", "cases": 1004 },
{ "deaths": 1215, "cases": 11312, "hospitalized": 935 }
]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"objectIdFieldName":"OBJECTID","uniqueIdField":{"name":"OBJECTID","isSystemMaintained":true},"globalIdFieldName":"","features":[]}
Loading