diff --git a/index.js b/index.js index a0e837e..85ec4e0 100644 --- a/index.js +++ b/index.js @@ -21,27 +21,39 @@ const moment = require('moment') const path = require('path') const fs = require('fs-extra') const parser = require('xml-js') -const util = require('util') +const exec = require('child-process-promise').exec +const tmp = require('tmp-promise') const COLUMN_SEPARATOR = ';' function deriveCsvFilePath(csvBasePath, type, timestamp, stationId) { let formatString + let contentType let extension + let subdir if (type === 'REPORT') { formatString = 'YYYYMMDD' - extension = 'BEOB' + contentType = 'BEOB' + extension = '.csv' + subdir = 'poi' + } else if (type === 'MOSMIX_KMZ'){ + formatString = 'YYYYMMDDHH' + contentType = 'MOSMIX' + extension = '.kml' + subdir = 'mos' } else { formatString = 'YYYYMMDDHH' - extension = 'MOSMIX' + contentType = 'MOSMIX' + extension = '.csv' + subdir = 'poi' } const dayDateTimeString = moment.utc(timestamp).format(formatString) - const fileName = stationId + '-' + extension + '.csv' + const fileName = stationId + '-' + contentType + extension - return path.join(csvBasePath, dayDateTimeString, fileName) + return path.join(csvBasePath, subdir, dayDateTimeString, fileName) } function parseCsvFile(fileContent) { @@ -210,65 +222,133 @@ async function readTimeseriesDataReport(csvBasePath, startTimestamp, endTimestam return result } -async function readTimeseriesDataMosmix(csvBasePath, startTimestamp, stationId) { - // TODO: ensure that not only the 6 o'clock-run is used but the others as well - let dayTimestamp = moment.utc(startTimestamp).startOf('day').add(6, 'hours').valueOf() - - const filePath = deriveCsvFilePath(csvBasePath, 'MOSMIX', dayTimestamp, stationId) - const fileContent = await fs.readFile(filePath, { - encoding: 'utf8' - }) +async function readTimeseriesDataMosmix(mosmixBasePath, startTimestamp, stationId) { + let dayTimestamp + let filePath + let fileContent + let partialTimeseries + let result = {} - const result = {} - const partialTimeseries = parseCsvFile(fileContent) - const timestamps = partialTimeseries['timestamp'] - _.forEach(partialTimeseries, (values, key) => { - if (key === 'timestamp') { - return - } + if (startTimestamp < moment.utc([2018, 8, 12,]).valueOf()) { + // TODO: ensure that not only the 6 o'clock-run is used but the others as well + dayTimestamp = moment.utc(startTimestamp).startOf('day').add(6, 'hours').valueOf() + filePath = deriveCsvFilePath(mosmixBasePath, 'MOSMIX', dayTimestamp, stationId) + fileContent = await fs.readFile(filePath, { encoding: 'utf8' }) + partialTimeseries = parseCsvFile(fileContent) - if (_.isNil(result[key])) { - result[key] = [] - } + const timestamps = partialTimeseries['timestamp'] + _.forEach(partialTimeseries, (values, key) => { + if (key === 'timestamp') { + return + } - _.forEach(values, (value, index) => { - result[key].push({ - timestamp: timestamps[index], - value: value + _.forEach(values, (value, index) => { + // Perform conversion to new format + switch (key) { + case 'TT': + let newKeyTT = 'TTT' + if (_.isNil(result[newKeyTT])) { + result[newKeyTT] = [] + } + result[newKeyTT].push({ + timestamp: timestamps[index], + value: value + 273.15 // °C to K + }) + break + case 'PPPP': + if (_.isNil(result[key])) { + result[key] = [] + } + result[key].push({ + timestamp: timestamps[index], + value: value * 100 // hPa to Pa + }) + break + case 'Td': + if (_.isNil(result[key])) { + result[key] = [] + } + result[key].push({ + timestamp: timestamps[index], + value: value + 273.15 // °C to K + }) + break + case 'ff': + let newKeyff = 'FF' + if (_.isNil(result[newKeyff])) { + result[newKeyff] = [] + } + result[newKeyff].push({ + timestamp: timestamps[index], + value: value / 3.6 // km/h to m/s + }) + break + case 'dd': + let newKeydd = 'DD' + if (_.isNil(result[newKeydd])) { + result[newKeydd] = [] + } + result[newKeydd].push({ + timestamp: timestamps[index], + value: value + }) + break + // ..unless nothing has changed, which has to be found by manually + // comparing MetElementDefinition.xml to the headings inside a .csv + default: + if (_.isNil(result[key])) { + result[key] = [] + } + result[key].push({ + timestamp: timestamps[index], + value: value + }) + } }) }) - }) - _.forEach(result, (item, key) => { - result[key] = _.sortBy(item, (item) => { - return item.timestamp + _.forEach(result, (item, key) => { + result[key] = _.sortBy(item, (item) => { + return item.timestamp + }) }) - }) + } else { + dayTimestamp = moment.utc(startTimestamp).startOf('day').add(3, 'hours').valueOf() + filePath = deriveCsvFilePath(mosmixBasePath, 'MOSMIX_KMZ', dayTimestamp, stationId) + const fileContent = await fs.readFile(filePath, { encoding: 'utf8' }) + // console.log('fileContent: ', fileContent) + result = await parseKmzFile(fileContent) + } + return result } async function main() { let stationId = '01001' // Jan Mayen - let startTimestamp = moment([2018, 8, 12]).valueOf() // now, UNIX EPOCH in ms resolution + let startTimestamp = moment.utc([2018, 8, 12]).valueOf() + let startTimestampCSV = moment.utc([2018, 8, 11]).valueOf() let basePath = '/home/moritz/tmp/crawler/weather/local_forecasts' - let csvBasePath = path.join(basePath, 'poi') + // let csvBasePath = path.join(basePath, 'poi') + // let mosBasePath = path.join(basePath, 'mos') let csvFile = path.join(basePath, 'poi', '2018091106', '01001-MOSMIX.csv') // let kmzFile = path.join(basePath, '2018091103', '01001-MOSMIX.kmz') - let kmzFile = path.join(basePath, 'mos', '2018091103', 'MOSMIX_L_2018091103_01001.kml') + let kmzFile = path.join(basePath, 'mos', '2018091203', '01001-MOSMIX.kml') console.log(csvFile) console.log(kmzFile) const exists = await fs.pathExists(kmzFile) && await fs.pathExists(csvFile) if (!exists) { - exit(1) + console.log('files do not exist, I am outta here') + process.exit(1) } - let kmzFileXML = await fs.readFile(kmzFile, 'utf8') - let resultKMZ = await parseKmzFile(kmzFileXML) + // let kmzFileXML = await fs.readFile(kmzFile, 'utf8') + // let resultKMZ = await parseKmzFile(kmzFileXML) + let resultKMZ = await readTimeseriesDataMosmix(basePath, startTimestamp, stationId) console.log(resultKMZ) // console.log(util.inspect(resultKMZ, false, null)) - let resultCSV = await readTimeseriesDataMosmix(csvBasePath, startTimestamp, stationId) + let resultCSV = await readTimeseriesDataMosmix(basePath, startTimestampCSV, stationId) console.log(resultCSV) // console.log(util.inspect(resultCSV, false, null)) } diff --git a/package-lock.json b/package-lock.json index d6b2215..75bd31a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4,6 +4,30 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "bluebird": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.5.2.tgz", + "integrity": "sha512-dhHTWMI7kMx5whMQntl7Vr9C6BvV10lFXDAasnqnrMYhXVCzzk6IO9Fo2L75jXHT07WrOngL1WDXOp+yYS91Yg==" + }, + "child-process-promise": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/child-process-promise/-/child-process-promise-2.2.1.tgz", + "integrity": "sha1-RzChHvYQ+tRQuPIjx50x172tgHQ=", + "requires": { + "cross-spawn": "4.0.2", + "node-version": "1.2.0", + "promise-polyfill": "6.1.0" + } + }, + "cross-spawn": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-4.0.2.tgz", + "integrity": "sha1-e5JHYhwjrf3ThWAEqCPL45dCTUE=", + "requires": { + "lru-cache": "4.1.3", + "which": "1.3.1" + } + }, "fs-extra": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-6.0.1.tgz", @@ -19,6 +43,11 @@ "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" }, + "isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha1-6PvzdNxVb/iUehDcsFctYz8s+hA=" + }, "jsonfile": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/jsonfile/-/jsonfile-4.0.0.tgz", @@ -32,21 +61,75 @@ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.10.tgz", "integrity": "sha512-UejweD1pDoXu+AD825lWwp4ZGtSwgnpZxb3JDViD7StjQz+Nb/6l093lx4OQ0foGWNRoc19mWy7BzL+UAK2iVg==" }, + "lru-cache": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-4.1.3.tgz", + "integrity": "sha512-fFEhvcgzuIoJVUF8fYr5KR0YqxD238zgObTps31YdADwPPAp82a4M8TrckkWyx7ekNlf9aBcVn81cFwwXngrJA==", + "requires": { + "pseudomap": "1.0.2", + "yallist": "2.1.2" + } + }, "moment": { "version": "2.22.2", "resolved": "https://registry.npmjs.org/moment/-/moment-2.22.2.tgz", "integrity": "sha1-PCV/mDn8DpP/UxSWMiOeuQeD/2Y=" }, + "node-version": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/node-version/-/node-version-1.2.0.tgz", + "integrity": "sha512-ma6oU4Sk0qOoKEAymVoTvk8EdXEobdS7m/mAGhDJ8Rouugho48crHBORAmy5BoOcv8wraPM6xumapQp5hl4iIQ==" + }, + "os-tmpdir": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", + "integrity": "sha1-u+Z0BseaqFxc/sdm/lc0VV36EnQ=" + }, + "promise-polyfill": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/promise-polyfill/-/promise-polyfill-6.1.0.tgz", + "integrity": "sha1-36lpQ+qcEh/KTem1hoyznTRy4Fc=" + }, + "pseudomap": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/pseudomap/-/pseudomap-1.0.2.tgz", + "integrity": "sha1-8FKijacOYYkX7wqKw0wa5aaChrM=" + }, "sax": { "version": "1.2.4", "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.4.tgz", "integrity": "sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw==" }, + "tmp": { + "version": "0.0.33", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz", + "integrity": "sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==", + "requires": { + "os-tmpdir": "1.0.2" + } + }, + "tmp-promise": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/tmp-promise/-/tmp-promise-1.0.5.tgz", + "integrity": "sha512-hOabTz9Tp49wCozFwuJe5ISrOqkECm6kzw66XTP23DuzNU7QS/KiZq5LC9Y7QSy8f1rPSLy4bKaViP0OwGI1cA==", + "requires": { + "bluebird": "3.5.2", + "tmp": "0.0.33" + } + }, "universalify": { "version": "0.1.1", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.1.tgz", "integrity": "sha1-+nG63UQ3r0wUiEHjs7Fl+enlkLc=" }, + "which": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/which/-/which-1.3.1.tgz", + "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==", + "requires": { + "isexe": "2.0.0" + } + }, "xml-js": { "version": "1.6.7", "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.7.tgz", @@ -54,6 +137,11 @@ "requires": { "sax": "1.2.4" } + }, + "yallist": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-2.1.2.tgz", + "integrity": "sha1-HBH5IY8HYImkfdUS+TxmmaaoHVI=" } } } diff --git a/package.json b/package.json index 8535c49..d15a35d 100644 --- a/package.json +++ b/package.json @@ -9,9 +9,11 @@ "author": "Florian Wagner ", "license": "ISC", "dependencies": { + "child-process-promise": "^2.2.1", "fs-extra": "^6.0.1", "lodash": "^4.17.10", "moment": "^2.22.2", + "tmp-promise": "^1.0.5", "xml-js": "^1.6.7" } }